1 /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/ 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" 8 #include "src/vec/vecimpl.h" 9 #include "src/inline/spops.h" 10 #include "petscsys.h" /*I "petscmat.h" I*/ 11 12 /* 13 Special version for Fun3d sequential benchmark 14 */ 15 #if defined(PETSC_HAVE_FORTRAN_CAPS) 16 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 17 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 18 #define matsetvaluesblocked4_ matsetvaluesblocked4 19 #endif 20 21 EXTERN_C_BEGIN 22 #undef __FUNCT__ 23 #define __FUNCT__ "matsetvaluesblocked4_" 24 void matsetvaluesblocked4_(Mat *AA,int *mm,const int im[],int *nn,const int in[],const PetscScalar v[]) 25 { 26 Mat A = *AA; 27 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 28 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 29 int *ai=a->i,*ailen=a->ilen; 30 int *aj=a->j,stepval; 31 const PetscScalar *value = v; 32 MatScalar *ap,*aa = a->a,*bap; 33 34 PetscFunctionBegin; 35 stepval = (n-1)*4; 36 for (k=0; k<m; k++) { /* loop over added rows */ 37 row = im[k]; 38 rp = aj + ai[row]; 39 ap = aa + 16*ai[row]; 40 nrow = ailen[row]; 41 low = 0; 42 for (l=0; l<n; l++) { /* loop over added columns */ 43 col = in[l]; 44 value = v + k*(stepval+4)*4 + l*4; 45 low = 0; high = nrow; 46 while (high-low > 7) { 47 t = (low+high)/2; 48 if (rp[t] > col) high = t; 49 else low = t; 50 } 51 for (i=low; i<high; i++) { 52 if (rp[i] > col) break; 53 if (rp[i] == col) { 54 bap = ap + 16*i; 55 for (ii=0; ii<4; ii++,value+=stepval) { 56 for (jj=ii; jj<16; jj+=4) { 57 bap[jj] += *value++; 58 } 59 } 60 goto noinsert2; 61 } 62 } 63 N = nrow++ - 1; 64 /* shift up all the later entries in this row */ 65 for (ii=N; ii>=i; ii--) { 66 rp[ii+1] = rp[ii]; 67 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 68 } 69 if (N >= i) { 70 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 71 } 72 rp[i] = col; 73 bap = ap + 16*i; 74 for (ii=0; ii<4; ii++,value+=stepval) { 75 for (jj=ii; jj<16; jj+=4) { 76 bap[jj] = *value++; 77 } 78 } 79 noinsert2:; 80 low = i; 81 } 82 ailen[row] = nrow; 83 } 84 } 85 EXTERN_C_END 86 87 #if defined(PETSC_HAVE_FORTRAN_CAPS) 88 #define matsetvalues4_ MATSETVALUES4 89 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 90 #define matsetvalues4_ matsetvalues4 91 #endif 92 93 EXTERN_C_BEGIN 94 #undef __FUNCT__ 95 #define __FUNCT__ "MatSetValues4_" 96 void matsetvalues4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v) 97 { 98 Mat A = *AA; 99 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 100 int *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 101 int *ai=a->i,*ailen=a->ilen; 102 int *aj=a->j,brow,bcol; 103 int ridx,cidx; 104 MatScalar *ap,value,*aa=a->a,*bap; 105 106 PetscFunctionBegin; 107 for (k=0; k<m; k++) { /* loop over added rows */ 108 row = im[k]; brow = row/4; 109 rp = aj + ai[brow]; 110 ap = aa + 16*ai[brow]; 111 nrow = ailen[brow]; 112 low = 0; 113 for (l=0; l<n; l++) { /* loop over added columns */ 114 col = in[l]; bcol = col/4; 115 ridx = row % 4; cidx = col % 4; 116 value = v[l + k*n]; 117 low = 0; high = nrow; 118 while (high-low > 7) { 119 t = (low+high)/2; 120 if (rp[t] > bcol) high = t; 121 else low = t; 122 } 123 for (i=low; i<high; i++) { 124 if (rp[i] > bcol) break; 125 if (rp[i] == bcol) { 126 bap = ap + 16*i + 4*cidx + ridx; 127 *bap += value; 128 goto noinsert1; 129 } 130 } 131 N = nrow++ - 1; 132 /* shift up all the later entries in this row */ 133 for (ii=N; ii>=i; ii--) { 134 rp[ii+1] = rp[ii]; 135 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 136 } 137 if (N>=i) { 138 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 139 } 140 rp[i] = bcol; 141 ap[16*i + 4*cidx + ridx] = value; 142 noinsert1:; 143 low = i; 144 } 145 ailen[brow] = nrow; 146 } 147 } 148 EXTERN_C_END 149 150 /* UGLY, ugly, ugly 151 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 152 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 153 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 154 converts the entries into single precision and then calls ..._MatScalar() to put them 155 into the single precision data structures. 156 */ 157 #if defined(PETSC_USE_MAT_SINGLE) 158 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 159 #else 160 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 161 #endif 162 #if defined(PETSC_HAVE_DSCPACK) 163 EXTERN int MatUseDSCPACK_MPIBAIJ(Mat); 164 #endif 165 166 #define CHUNKSIZE 10 167 168 /* 169 Checks for missing diagonals 170 */ 171 #undef __FUNCT__ 172 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 173 int MatMissingDiagonal_SeqBAIJ(Mat A) 174 { 175 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 176 int *diag,*jj = a->j,i,ierr; 177 178 PetscFunctionBegin; 179 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 180 diag = a->diag; 181 for (i=0; i<a->mbs; i++) { 182 if (jj[diag[i]] != i) { 183 SETERRQ1(1,"Matrix is missing diagonal number %d",i); 184 } 185 } 186 PetscFunctionReturn(0); 187 } 188 189 #undef __FUNCT__ 190 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 191 int MatMarkDiagonal_SeqBAIJ(Mat A) 192 { 193 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 194 int i,j,*diag,m = a->mbs,ierr; 195 196 PetscFunctionBegin; 197 if (a->diag) PetscFunctionReturn(0); 198 199 ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr); 200 PetscLogObjectMemory(A,(m+1)*sizeof(int)); 201 for (i=0; i<m; i++) { 202 diag[i] = a->i[i+1]; 203 for (j=a->i[i]; j<a->i[i+1]; j++) { 204 if (a->j[j] == i) { 205 diag[i] = j; 206 break; 207 } 208 } 209 } 210 a->diag = diag; 211 PetscFunctionReturn(0); 212 } 213 214 215 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 216 217 #undef __FUNCT__ 218 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 219 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done) 220 { 221 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 222 int ierr,n = a->mbs,i; 223 224 PetscFunctionBegin; 225 *nn = n; 226 if (!ia) PetscFunctionReturn(0); 227 if (symmetric) { 228 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 229 } else if (oshift == 1) { 230 /* temporarily add 1 to i and j indices */ 231 int nz = a->i[n]; 232 for (i=0; i<nz; i++) a->j[i]++; 233 for (i=0; i<n+1; i++) a->i[i]++; 234 *ia = a->i; *ja = a->j; 235 } else { 236 *ia = a->i; *ja = a->j; 237 } 238 239 PetscFunctionReturn(0); 240 } 241 242 #undef __FUNCT__ 243 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 244 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done) 245 { 246 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 247 int i,n = a->mbs,ierr; 248 249 PetscFunctionBegin; 250 if (!ia) PetscFunctionReturn(0); 251 if (symmetric) { 252 ierr = PetscFree(*ia);CHKERRQ(ierr); 253 ierr = PetscFree(*ja);CHKERRQ(ierr); 254 } else if (oshift == 1) { 255 int nz = a->i[n]-1; 256 for (i=0; i<nz; i++) a->j[i]--; 257 for (i=0; i<n+1; i++) a->i[i]--; 258 } 259 PetscFunctionReturn(0); 260 } 261 262 #undef __FUNCT__ 263 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ" 264 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs) 265 { 266 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 267 268 PetscFunctionBegin; 269 *bs = baij->bs; 270 PetscFunctionReturn(0); 271 } 272 273 274 #undef __FUNCT__ 275 #define __FUNCT__ "MatDestroy_SeqBAIJ" 276 int MatDestroy_SeqBAIJ(Mat A) 277 { 278 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 279 int ierr; 280 281 PetscFunctionBegin; 282 #if defined(PETSC_USE_LOG) 283 PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz); 284 #endif 285 ierr = PetscFree(a->a);CHKERRQ(ierr); 286 if (!a->singlemalloc) { 287 ierr = PetscFree(a->i);CHKERRQ(ierr); 288 ierr = PetscFree(a->j);CHKERRQ(ierr); 289 } 290 if (a->row) { 291 ierr = ISDestroy(a->row);CHKERRQ(ierr); 292 } 293 if (a->col) { 294 ierr = ISDestroy(a->col);CHKERRQ(ierr); 295 } 296 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 297 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 298 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 299 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 300 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 301 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 302 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 303 #if defined(PETSC_USE_MAT_SINGLE) 304 if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);} 305 #endif 306 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 307 308 ierr = PetscFree(a);CHKERRQ(ierr); 309 PetscFunctionReturn(0); 310 } 311 312 #undef __FUNCT__ 313 #define __FUNCT__ "MatSetOption_SeqBAIJ" 314 int MatSetOption_SeqBAIJ(Mat A,MatOption op) 315 { 316 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 317 318 PetscFunctionBegin; 319 switch (op) { 320 case MAT_ROW_ORIENTED: 321 a->roworiented = PETSC_TRUE; 322 break; 323 case MAT_COLUMN_ORIENTED: 324 a->roworiented = PETSC_FALSE; 325 break; 326 case MAT_COLUMNS_SORTED: 327 a->sorted = PETSC_TRUE; 328 break; 329 case MAT_COLUMNS_UNSORTED: 330 a->sorted = PETSC_FALSE; 331 break; 332 case MAT_KEEP_ZEROED_ROWS: 333 a->keepzeroedrows = PETSC_TRUE; 334 break; 335 case MAT_NO_NEW_NONZERO_LOCATIONS: 336 a->nonew = 1; 337 break; 338 case MAT_NEW_NONZERO_LOCATION_ERR: 339 a->nonew = -1; 340 break; 341 case MAT_NEW_NONZERO_ALLOCATION_ERR: 342 a->nonew = -2; 343 break; 344 case MAT_YES_NEW_NONZERO_LOCATIONS: 345 a->nonew = 0; 346 break; 347 case MAT_ROWS_SORTED: 348 case MAT_ROWS_UNSORTED: 349 case MAT_YES_NEW_DIAGONALS: 350 case MAT_IGNORE_OFF_PROC_ENTRIES: 351 case MAT_USE_HASH_TABLE: 352 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 353 break; 354 case MAT_NO_NEW_DIAGONALS: 355 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 356 default: 357 SETERRQ(PETSC_ERR_SUP,"unknown option"); 358 } 359 PetscFunctionReturn(0); 360 } 361 362 #undef __FUNCT__ 363 #define __FUNCT__ "MatGetRow_SeqBAIJ" 364 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 365 { 366 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 367 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr; 368 MatScalar *aa,*aa_i; 369 PetscScalar *v_i; 370 371 PetscFunctionBegin; 372 bs = a->bs; 373 ai = a->i; 374 aj = a->j; 375 aa = a->a; 376 bs2 = a->bs2; 377 378 if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range"); 379 380 bn = row/bs; /* Block number */ 381 bp = row % bs; /* Block Position */ 382 M = ai[bn+1] - ai[bn]; 383 *nz = bs*M; 384 385 if (v) { 386 *v = 0; 387 if (*nz) { 388 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 389 for (i=0; i<M; i++) { /* for each block in the block row */ 390 v_i = *v + i*bs; 391 aa_i = aa + bs2*(ai[bn] + i); 392 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 393 } 394 } 395 } 396 397 if (idx) { 398 *idx = 0; 399 if (*nz) { 400 ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr); 401 for (i=0; i<M; i++) { /* for each block in the block row */ 402 idx_i = *idx + i*bs; 403 itmp = bs*aj[ai[bn] + i]; 404 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 405 } 406 } 407 } 408 PetscFunctionReturn(0); 409 } 410 411 #undef __FUNCT__ 412 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 413 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 414 { 415 int ierr; 416 417 PetscFunctionBegin; 418 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 419 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 420 PetscFunctionReturn(0); 421 } 422 423 #undef __FUNCT__ 424 #define __FUNCT__ "MatTranspose_SeqBAIJ" 425 int MatTranspose_SeqBAIJ(Mat A,Mat *B) 426 { 427 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 428 Mat C; 429 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 430 int *rows,*cols,bs2=a->bs2; 431 PetscScalar *array; 432 433 PetscFunctionBegin; 434 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 435 ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr); 436 ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr); 437 438 #if defined(PETSC_USE_MAT_SINGLE) 439 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 440 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 441 #else 442 array = a->a; 443 #endif 444 445 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 446 ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr); 447 ierr = PetscFree(col);CHKERRQ(ierr); 448 ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr); 449 cols = rows + bs; 450 for (i=0; i<mbs; i++) { 451 cols[0] = i*bs; 452 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 453 len = ai[i+1] - ai[i]; 454 for (j=0; j<len; j++) { 455 rows[0] = (*aj++)*bs; 456 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 457 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 458 array += bs2; 459 } 460 } 461 ierr = PetscFree(rows);CHKERRQ(ierr); 462 #if defined(PETSC_USE_MAT_SINGLE) 463 ierr = PetscFree(array); 464 #endif 465 466 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 467 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 468 469 if (B) { 470 *B = C; 471 } else { 472 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 473 } 474 PetscFunctionReturn(0); 475 } 476 477 #undef __FUNCT__ 478 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 479 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 480 { 481 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 482 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 483 PetscScalar *aa; 484 FILE *file; 485 486 PetscFunctionBegin; 487 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 488 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 489 col_lens[0] = MAT_FILE_COOKIE; 490 491 col_lens[1] = A->m; 492 col_lens[2] = A->n; 493 col_lens[3] = a->nz*bs2; 494 495 /* store lengths of each row and write (including header) to file */ 496 count = 0; 497 for (i=0; i<a->mbs; i++) { 498 for (j=0; j<bs; j++) { 499 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 500 } 501 } 502 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 503 ierr = PetscFree(col_lens);CHKERRQ(ierr); 504 505 /* store column indices (zero start index) */ 506 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 507 count = 0; 508 for (i=0; i<a->mbs; i++) { 509 for (j=0; j<bs; j++) { 510 for (k=a->i[i]; k<a->i[i+1]; k++) { 511 for (l=0; l<bs; l++) { 512 jj[count++] = bs*a->j[k] + l; 513 } 514 } 515 } 516 } 517 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr); 518 ierr = PetscFree(jj);CHKERRQ(ierr); 519 520 /* store nonzero values */ 521 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 522 count = 0; 523 for (i=0; i<a->mbs; i++) { 524 for (j=0; j<bs; j++) { 525 for (k=a->i[i]; k<a->i[i+1]; k++) { 526 for (l=0; l<bs; l++) { 527 aa[count++] = a->a[bs2*k + l*bs + j]; 528 } 529 } 530 } 531 } 532 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr); 533 ierr = PetscFree(aa);CHKERRQ(ierr); 534 535 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 536 if (file) { 537 fprintf(file,"-matload_block_size %d\n",a->bs); 538 } 539 PetscFunctionReturn(0); 540 } 541 542 extern int MatMPIBAIJFactorInfo_DSCPACK(Mat,PetscViewer); 543 544 #undef __FUNCT__ 545 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 546 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 547 { 548 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 549 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 550 PetscViewerFormat format; 551 552 PetscFunctionBegin; 553 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 554 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 555 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 556 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 557 Mat aij; 558 ierr = MatConvert(A,MATSEQAIJ,&aij);CHKERRQ(ierr); 559 ierr = MatView(aij,viewer);CHKERRQ(ierr); 560 ierr = MatDestroy(aij);CHKERRQ(ierr); 561 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 562 #if defined(PETSC_HAVE_DSCPACK) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX) 563 ierr = MatMPIBAIJFactorInfo_DSCPACK(A,viewer);CHKERRQ(ierr); 564 #endif 565 PetscFunctionReturn(0); 566 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 567 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 568 for (i=0; i<a->mbs; i++) { 569 for (j=0; j<bs; j++) { 570 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 571 for (k=a->i[i]; k<a->i[i+1]; k++) { 572 for (l=0; l<bs; l++) { 573 #if defined(PETSC_USE_COMPLEX) 574 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 575 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %gi) ",bs*a->j[k]+l, 576 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 577 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 578 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %gi) ",bs*a->j[k]+l, 579 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 580 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 581 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 582 } 583 #else 584 if (a->a[bs2*k + l*bs + j] != 0.0) { 585 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 586 } 587 #endif 588 } 589 } 590 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 591 } 592 } 593 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 594 } else { 595 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 596 for (i=0; i<a->mbs; i++) { 597 for (j=0; j<bs; j++) { 598 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 599 for (k=a->i[i]; k<a->i[i+1]; k++) { 600 for (l=0; l<bs; l++) { 601 #if defined(PETSC_USE_COMPLEX) 602 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 603 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 604 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 605 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 606 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 607 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 608 } else { 609 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 610 } 611 #else 612 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 613 #endif 614 } 615 } 616 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 617 } 618 } 619 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 620 } 621 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 622 PetscFunctionReturn(0); 623 } 624 625 #undef __FUNCT__ 626 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 627 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 628 { 629 Mat A = (Mat) Aa; 630 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 631 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2; 632 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 633 MatScalar *aa; 634 PetscViewer viewer; 635 636 PetscFunctionBegin; 637 638 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 639 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 640 641 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 642 643 /* loop over matrix elements drawing boxes */ 644 color = PETSC_DRAW_BLUE; 645 for (i=0,row=0; i<mbs; i++,row+=bs) { 646 for (j=a->i[i]; j<a->i[i+1]; j++) { 647 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 648 x_l = a->j[j]*bs; x_r = x_l + 1.0; 649 aa = a->a + j*bs2; 650 for (k=0; k<bs; k++) { 651 for (l=0; l<bs; l++) { 652 if (PetscRealPart(*aa++) >= 0.) continue; 653 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 654 } 655 } 656 } 657 } 658 color = PETSC_DRAW_CYAN; 659 for (i=0,row=0; i<mbs; i++,row+=bs) { 660 for (j=a->i[i]; j<a->i[i+1]; j++) { 661 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 662 x_l = a->j[j]*bs; x_r = x_l + 1.0; 663 aa = a->a + j*bs2; 664 for (k=0; k<bs; k++) { 665 for (l=0; l<bs; l++) { 666 if (PetscRealPart(*aa++) != 0.) continue; 667 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 668 } 669 } 670 } 671 } 672 673 color = PETSC_DRAW_RED; 674 for (i=0,row=0; i<mbs; i++,row+=bs) { 675 for (j=a->i[i]; j<a->i[i+1]; j++) { 676 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 677 x_l = a->j[j]*bs; x_r = x_l + 1.0; 678 aa = a->a + j*bs2; 679 for (k=0; k<bs; k++) { 680 for (l=0; l<bs; l++) { 681 if (PetscRealPart(*aa++) <= 0.) continue; 682 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 683 } 684 } 685 } 686 } 687 PetscFunctionReturn(0); 688 } 689 690 #undef __FUNCT__ 691 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 692 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 693 { 694 int ierr; 695 PetscReal xl,yl,xr,yr,w,h; 696 PetscDraw draw; 697 PetscTruth isnull; 698 699 PetscFunctionBegin; 700 701 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 702 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 703 704 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 705 xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0; 706 xr += w; yr += h; xl = -w; yl = -h; 707 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 708 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 709 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 710 PetscFunctionReturn(0); 711 } 712 713 #undef __FUNCT__ 714 #define __FUNCT__ "MatView_SeqBAIJ" 715 int MatView_SeqBAIJ(Mat A,PetscViewer viewer) 716 { 717 int ierr; 718 PetscTruth issocket,isascii,isbinary,isdraw; 719 720 PetscFunctionBegin; 721 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 722 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 723 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 724 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 725 if (issocket) { 726 SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 727 } else if (isascii){ 728 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 729 } else if (isbinary) { 730 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 731 } else if (isdraw) { 732 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 733 } else { 734 SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name); 735 } 736 PetscFunctionReturn(0); 737 } 738 739 740 #undef __FUNCT__ 741 #define __FUNCT__ "MatGetValues_SeqBAIJ" 742 int MatGetValues_SeqBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[]) 743 { 744 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 745 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 746 int *ai = a->i,*ailen = a->ilen; 747 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 748 MatScalar *ap,*aa = a->a,zero = 0.0; 749 750 PetscFunctionBegin; 751 for (k=0; k<m; k++) { /* loop over rows */ 752 row = im[k]; brow = row/bs; 753 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 754 if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 755 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 756 nrow = ailen[brow]; 757 for (l=0; l<n; l++) { /* loop over columns */ 758 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 759 if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 760 col = in[l] ; 761 bcol = col/bs; 762 cidx = col%bs; 763 ridx = row%bs; 764 high = nrow; 765 low = 0; /* assume unsorted */ 766 while (high-low > 5) { 767 t = (low+high)/2; 768 if (rp[t] > bcol) high = t; 769 else low = t; 770 } 771 for (i=low; i<high; i++) { 772 if (rp[i] > bcol) break; 773 if (rp[i] == bcol) { 774 *v++ = ap[bs2*i+bs*cidx+ridx]; 775 goto finished; 776 } 777 } 778 *v++ = zero; 779 finished:; 780 } 781 } 782 PetscFunctionReturn(0); 783 } 784 785 #if defined(PETSC_USE_MAT_SINGLE) 786 #undef __FUNCT__ 787 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 788 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 789 { 790 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 791 int ierr,i,N = m*n*b->bs2; 792 MatScalar *vsingle; 793 794 PetscFunctionBegin; 795 if (N > b->setvalueslen) { 796 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 797 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 798 b->setvalueslen = N; 799 } 800 vsingle = b->setvaluescopy; 801 for (i=0; i<N; i++) { 802 vsingle[i] = v[i]; 803 } 804 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 805 PetscFunctionReturn(0); 806 } 807 #endif 808 809 810 #undef __FUNCT__ 811 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 812 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode is) 813 { 814 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 815 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 816 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 817 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 818 PetscTruth roworiented=a->roworiented; 819 const MatScalar *value = v; 820 MatScalar *ap,*aa = a->a,*bap; 821 822 PetscFunctionBegin; 823 if (roworiented) { 824 stepval = (n-1)*bs; 825 } else { 826 stepval = (m-1)*bs; 827 } 828 for (k=0; k<m; k++) { /* loop over added rows */ 829 row = im[k]; 830 if (row < 0) continue; 831 #if defined(PETSC_USE_BOPT_g) 832 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1); 833 #endif 834 rp = aj + ai[row]; 835 ap = aa + bs2*ai[row]; 836 rmax = imax[row]; 837 nrow = ailen[row]; 838 low = 0; 839 for (l=0; l<n; l++) { /* loop over added columns */ 840 if (in[l] < 0) continue; 841 #if defined(PETSC_USE_BOPT_g) 842 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],a->nbs-1); 843 #endif 844 col = in[l]; 845 if (roworiented) { 846 value = v + k*(stepval+bs)*bs + l*bs; 847 } else { 848 value = v + l*(stepval+bs)*bs + k*bs; 849 } 850 if (!sorted) low = 0; high = nrow; 851 while (high-low > 7) { 852 t = (low+high)/2; 853 if (rp[t] > col) high = t; 854 else low = t; 855 } 856 for (i=low; i<high; i++) { 857 if (rp[i] > col) break; 858 if (rp[i] == col) { 859 bap = ap + bs2*i; 860 if (roworiented) { 861 if (is == ADD_VALUES) { 862 for (ii=0; ii<bs; ii++,value+=stepval) { 863 for (jj=ii; jj<bs2; jj+=bs) { 864 bap[jj] += *value++; 865 } 866 } 867 } else { 868 for (ii=0; ii<bs; ii++,value+=stepval) { 869 for (jj=ii; jj<bs2; jj+=bs) { 870 bap[jj] = *value++; 871 } 872 } 873 } 874 } else { 875 if (is == ADD_VALUES) { 876 for (ii=0; ii<bs; ii++,value+=stepval) { 877 for (jj=0; jj<bs; jj++) { 878 *bap++ += *value++; 879 } 880 } 881 } else { 882 for (ii=0; ii<bs; ii++,value+=stepval) { 883 for (jj=0; jj<bs; jj++) { 884 *bap++ = *value++; 885 } 886 } 887 } 888 } 889 goto noinsert2; 890 } 891 } 892 if (nonew == 1) goto noinsert2; 893 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 894 if (nrow >= rmax) { 895 /* there is no extra room in row, therefore enlarge */ 896 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 897 MatScalar *new_a; 898 899 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 900 901 /* malloc new storage space */ 902 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 903 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 904 new_j = (int*)(new_a + bs2*new_nz); 905 new_i = new_j + new_nz; 906 907 /* copy over old data into new slots */ 908 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 909 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 910 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 911 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 912 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 913 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 914 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 915 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 916 /* free up old matrix storage */ 917 ierr = PetscFree(a->a);CHKERRQ(ierr); 918 if (!a->singlemalloc) { 919 ierr = PetscFree(a->i);CHKERRQ(ierr); 920 ierr = PetscFree(a->j);CHKERRQ(ierr); 921 } 922 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 923 a->singlemalloc = PETSC_TRUE; 924 925 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 926 rmax = imax[row] = imax[row] + CHUNKSIZE; 927 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 928 a->maxnz += bs2*CHUNKSIZE; 929 a->reallocs++; 930 a->nz++; 931 } 932 N = nrow++ - 1; 933 /* shift up all the later entries in this row */ 934 for (ii=N; ii>=i; ii--) { 935 rp[ii+1] = rp[ii]; 936 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 937 } 938 if (N >= i) { 939 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 940 } 941 rp[i] = col; 942 bap = ap + bs2*i; 943 if (roworiented) { 944 for (ii=0; ii<bs; ii++,value+=stepval) { 945 for (jj=ii; jj<bs2; jj+=bs) { 946 bap[jj] = *value++; 947 } 948 } 949 } else { 950 for (ii=0; ii<bs; ii++,value+=stepval) { 951 for (jj=0; jj<bs; jj++) { 952 *bap++ = *value++; 953 } 954 } 955 } 956 noinsert2:; 957 low = i; 958 } 959 ailen[row] = nrow; 960 } 961 PetscFunctionReturn(0); 962 } 963 964 #undef __FUNCT__ 965 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 966 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 967 { 968 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 969 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 970 int m = A->m,*ip,N,*ailen = a->ilen; 971 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 972 MatScalar *aa = a->a,*ap; 973 #if defined(PETSC_HAVE_DSCPACK) 974 PetscTruth flag; 975 #endif 976 977 PetscFunctionBegin; 978 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 979 980 if (m) rmax = ailen[0]; 981 for (i=1; i<mbs; i++) { 982 /* move each row back by the amount of empty slots (fshift) before it*/ 983 fshift += imax[i-1] - ailen[i-1]; 984 rmax = PetscMax(rmax,ailen[i]); 985 if (fshift) { 986 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 987 N = ailen[i]; 988 for (j=0; j<N; j++) { 989 ip[j-fshift] = ip[j]; 990 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 991 } 992 } 993 ai[i] = ai[i-1] + ailen[i-1]; 994 } 995 if (mbs) { 996 fshift += imax[mbs-1] - ailen[mbs-1]; 997 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 998 } 999 /* reset ilen and imax for each row */ 1000 for (i=0; i<mbs; i++) { 1001 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1002 } 1003 a->nz = ai[mbs]; 1004 1005 /* diagonals may have moved, so kill the diagonal pointers */ 1006 if (fshift && a->diag) { 1007 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1008 PetscLogObjectMemory(A,-(mbs+1)*sizeof(int)); 1009 a->diag = 0; 1010 } 1011 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2); 1012 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs); 1013 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 1014 a->reallocs = 0; 1015 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1016 1017 #if defined(PETSC_HAVE_DSCPACK) 1018 ierr = PetscOptionsHasName(A->prefix,"-mat_baij_dscpack",&flag);CHKERRQ(ierr); 1019 if (flag) { ierr = MatUseDSCPACK_MPIBAIJ(A);CHKERRQ(ierr); } 1020 #endif 1021 1022 PetscFunctionReturn(0); 1023 } 1024 1025 1026 1027 /* 1028 This function returns an array of flags which indicate the locations of contiguous 1029 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1030 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1031 Assume: sizes should be long enough to hold all the values. 1032 */ 1033 #undef __FUNCT__ 1034 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1035 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 1036 { 1037 int i,j,k,row; 1038 PetscTruth flg; 1039 1040 PetscFunctionBegin; 1041 for (i=0,j=0; i<n; j++) { 1042 row = idx[i]; 1043 if (row%bs!=0) { /* Not the begining of a block */ 1044 sizes[j] = 1; 1045 i++; 1046 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1047 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1048 i++; 1049 } else { /* Begining of the block, so check if the complete block exists */ 1050 flg = PETSC_TRUE; 1051 for (k=1; k<bs; k++) { 1052 if (row+k != idx[i+k]) { /* break in the block */ 1053 flg = PETSC_FALSE; 1054 break; 1055 } 1056 } 1057 if (flg == PETSC_TRUE) { /* No break in the bs */ 1058 sizes[j] = bs; 1059 i+= bs; 1060 } else { 1061 sizes[j] = 1; 1062 i++; 1063 } 1064 } 1065 } 1066 *bs_max = j; 1067 PetscFunctionReturn(0); 1068 } 1069 1070 #undef __FUNCT__ 1071 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1072 int MatZeroRows_SeqBAIJ(Mat A,IS is,const PetscScalar *diag) 1073 { 1074 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1075 int ierr,i,j,k,count,is_n,*is_idx,*rows; 1076 int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max; 1077 PetscScalar zero = 0.0; 1078 MatScalar *aa; 1079 1080 PetscFunctionBegin; 1081 /* Make a copy of the IS and sort it */ 1082 ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr); 1083 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1084 1085 /* allocate memory for rows,sizes */ 1086 ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr); 1087 sizes = rows + is_n; 1088 1089 /* copy IS values to rows, and sort them */ 1090 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1091 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1092 if (baij->keepzeroedrows) { 1093 for (i=0; i<is_n; i++) { sizes[i] = 1; } 1094 bs_max = is_n; 1095 } else { 1096 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1097 } 1098 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 1099 1100 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1101 row = rows[j]; 1102 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row); 1103 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1104 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 1105 if (sizes[i] == bs && !baij->keepzeroedrows) { 1106 if (diag) { 1107 if (baij->ilen[row/bs] > 0) { 1108 baij->ilen[row/bs] = 1; 1109 baij->j[baij->i[row/bs]] = row/bs; 1110 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 1111 } 1112 /* Now insert all the diagonal values for this bs */ 1113 for (k=0; k<bs; k++) { 1114 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 1115 } 1116 } else { /* (!diag) */ 1117 baij->ilen[row/bs] = 0; 1118 } /* end (!diag) */ 1119 } else { /* (sizes[i] != bs) */ 1120 #if defined (PETSC_USE_DEBUG) 1121 if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1"); 1122 #endif 1123 for (k=0; k<count; k++) { 1124 aa[0] = zero; 1125 aa += bs; 1126 } 1127 if (diag) { 1128 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1129 } 1130 } 1131 } 1132 1133 ierr = PetscFree(rows);CHKERRQ(ierr); 1134 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1135 PetscFunctionReturn(0); 1136 } 1137 1138 #undef __FUNCT__ 1139 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1140 int MatSetValues_SeqBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is) 1141 { 1142 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1143 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 1144 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1145 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 1146 int ridx,cidx,bs2=a->bs2,ierr; 1147 PetscTruth roworiented=a->roworiented; 1148 MatScalar *ap,value,*aa=a->a,*bap; 1149 1150 PetscFunctionBegin; 1151 for (k=0; k<m; k++) { /* loop over added rows */ 1152 row = im[k]; brow = row/bs; 1153 if (row < 0) continue; 1154 #if defined(PETSC_USE_BOPT_g) 1155 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1); 1156 #endif 1157 rp = aj + ai[brow]; 1158 ap = aa + bs2*ai[brow]; 1159 rmax = imax[brow]; 1160 nrow = ailen[brow]; 1161 low = 0; 1162 for (l=0; l<n; l++) { /* loop over added columns */ 1163 if (in[l] < 0) continue; 1164 #if defined(PETSC_USE_BOPT_g) 1165 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1); 1166 #endif 1167 col = in[l]; bcol = col/bs; 1168 ridx = row % bs; cidx = col % bs; 1169 if (roworiented) { 1170 value = v[l + k*n]; 1171 } else { 1172 value = v[k + l*m]; 1173 } 1174 if (!sorted) low = 0; high = nrow; 1175 while (high-low > 7) { 1176 t = (low+high)/2; 1177 if (rp[t] > bcol) high = t; 1178 else low = t; 1179 } 1180 for (i=low; i<high; i++) { 1181 if (rp[i] > bcol) break; 1182 if (rp[i] == bcol) { 1183 bap = ap + bs2*i + bs*cidx + ridx; 1184 if (is == ADD_VALUES) *bap += value; 1185 else *bap = value; 1186 goto noinsert1; 1187 } 1188 } 1189 if (nonew == 1) goto noinsert1; 1190 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 1191 if (nrow >= rmax) { 1192 /* there is no extra room in row, therefore enlarge */ 1193 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1194 MatScalar *new_a; 1195 1196 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 1197 1198 /* Malloc new storage space */ 1199 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1200 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1201 new_j = (int*)(new_a + bs2*new_nz); 1202 new_i = new_j + new_nz; 1203 1204 /* copy over old data into new slots */ 1205 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 1206 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1207 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 1208 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1209 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 1210 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1211 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1212 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1213 /* free up old matrix storage */ 1214 ierr = PetscFree(a->a);CHKERRQ(ierr); 1215 if (!a->singlemalloc) { 1216 ierr = PetscFree(a->i);CHKERRQ(ierr); 1217 ierr = PetscFree(a->j);CHKERRQ(ierr); 1218 } 1219 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1220 a->singlemalloc = PETSC_TRUE; 1221 1222 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1223 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1224 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 1225 a->maxnz += bs2*CHUNKSIZE; 1226 a->reallocs++; 1227 a->nz++; 1228 } 1229 N = nrow++ - 1; 1230 /* shift up all the later entries in this row */ 1231 for (ii=N; ii>=i; ii--) { 1232 rp[ii+1] = rp[ii]; 1233 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1234 } 1235 if (N>=i) { 1236 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1237 } 1238 rp[i] = bcol; 1239 ap[bs2*i + bs*cidx + ridx] = value; 1240 noinsert1:; 1241 low = i; 1242 } 1243 ailen[brow] = nrow; 1244 } 1245 PetscFunctionReturn(0); 1246 } 1247 1248 1249 #undef __FUNCT__ 1250 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1251 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info) 1252 { 1253 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1254 Mat outA; 1255 int ierr; 1256 PetscTruth row_identity,col_identity; 1257 1258 PetscFunctionBegin; 1259 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1260 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1261 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1262 if (!row_identity || !col_identity) { 1263 SETERRQ(1,"Row and column permutations must be identity for in-place ILU"); 1264 } 1265 1266 outA = inA; 1267 inA->factor = FACTOR_LU; 1268 1269 if (!a->diag) { 1270 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1271 } 1272 1273 a->row = row; 1274 a->col = col; 1275 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1276 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1277 1278 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1279 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1280 PetscLogObjectParent(inA,a->icol); 1281 1282 /* 1283 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1284 for ILU(0) factorization with natural ordering 1285 */ 1286 if (a->bs < 8) { 1287 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1288 } else { 1289 if (!a->solve_work) { 1290 ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1291 PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar)); 1292 } 1293 } 1294 1295 ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr); 1296 1297 PetscFunctionReturn(0); 1298 } 1299 #undef __FUNCT__ 1300 #define __FUNCT__ "MatPrintHelp_SeqBAIJ" 1301 int MatPrintHelp_SeqBAIJ(Mat A) 1302 { 1303 static PetscTruth called = PETSC_FALSE; 1304 MPI_Comm comm = A->comm; 1305 int ierr; 1306 1307 PetscFunctionBegin; 1308 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1309 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1310 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1311 PetscFunctionReturn(0); 1312 } 1313 1314 EXTERN_C_BEGIN 1315 #undef __FUNCT__ 1316 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1317 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices) 1318 { 1319 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1320 int i,nz,nbs; 1321 1322 PetscFunctionBegin; 1323 nz = baij->maxnz/baij->bs2; 1324 nbs = baij->nbs; 1325 for (i=0; i<nz; i++) { 1326 baij->j[i] = indices[i]; 1327 } 1328 baij->nz = nz; 1329 for (i=0; i<nbs; i++) { 1330 baij->ilen[i] = baij->imax[i]; 1331 } 1332 1333 PetscFunctionReturn(0); 1334 } 1335 EXTERN_C_END 1336 1337 #undef __FUNCT__ 1338 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1339 /*@ 1340 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1341 in the matrix. 1342 1343 Input Parameters: 1344 + mat - the SeqBAIJ matrix 1345 - indices - the column indices 1346 1347 Level: advanced 1348 1349 Notes: 1350 This can be called if you have precomputed the nonzero structure of the 1351 matrix and want to provide it to the matrix object to improve the performance 1352 of the MatSetValues() operation. 1353 1354 You MUST have set the correct numbers of nonzeros per row in the call to 1355 MatCreateSeqBAIJ(). 1356 1357 MUST be called before any calls to MatSetValues(); 1358 1359 @*/ 1360 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices) 1361 { 1362 int ierr,(*f)(Mat,int *); 1363 1364 PetscFunctionBegin; 1365 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1366 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1367 if (f) { 1368 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1369 } else { 1370 SETERRQ(1,"Wrong type of matrix to set column indices"); 1371 } 1372 PetscFunctionReturn(0); 1373 } 1374 1375 #undef __FUNCT__ 1376 #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1377 int MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1378 { 1379 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1380 int ierr,i,j,n,row,bs,*ai,*aj,mbs; 1381 PetscReal atmp; 1382 PetscScalar *x,zero = 0.0; 1383 MatScalar *aa; 1384 int ncols,brow,krow,kcol; 1385 1386 PetscFunctionBegin; 1387 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1388 bs = a->bs; 1389 aa = a->a; 1390 ai = a->i; 1391 aj = a->j; 1392 mbs = a->mbs; 1393 1394 ierr = VecSet(&zero,v);CHKERRQ(ierr); 1395 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1396 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1397 if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1398 for (i=0; i<mbs; i++) { 1399 ncols = ai[1] - ai[0]; ai++; 1400 brow = bs*i; 1401 for (j=0; j<ncols; j++){ 1402 /* bcol = bs*(*aj); */ 1403 for (kcol=0; kcol<bs; kcol++){ 1404 for (krow=0; krow<bs; krow++){ 1405 atmp = PetscAbsScalar(*aa); aa++; 1406 row = brow + krow; /* row index */ 1407 /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */ 1408 if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1409 } 1410 } 1411 aj++; 1412 } 1413 } 1414 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1415 PetscFunctionReturn(0); 1416 } 1417 1418 #undef __FUNCT__ 1419 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1420 int MatSetUpPreallocation_SeqBAIJ(Mat A) 1421 { 1422 int ierr; 1423 1424 PetscFunctionBegin; 1425 ierr = MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1426 PetscFunctionReturn(0); 1427 } 1428 1429 #undef __FUNCT__ 1430 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1431 int MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1432 { 1433 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1434 PetscFunctionBegin; 1435 *array = a->a; 1436 PetscFunctionReturn(0); 1437 } 1438 1439 #undef __FUNCT__ 1440 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1441 int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1442 { 1443 PetscFunctionBegin; 1444 PetscFunctionReturn(0); 1445 } 1446 1447 #include "petscblaslapack.h" 1448 #undef __FUNCT__ 1449 #define __FUNCT__ "MatAXPY_SeqBAIJ" 1450 int MatAXPY_SeqBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str) 1451 { 1452 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 1453 int ierr,one=1,i,bs=y->bs,j,bs2; 1454 1455 PetscFunctionBegin; 1456 if (str == SAME_NONZERO_PATTERN) { 1457 BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one); 1458 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1459 if (y->xtoy && y->XtoY != X) { 1460 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1461 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1462 } 1463 if (!y->xtoy) { /* get xtoy */ 1464 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1465 y->XtoY = X; 1466 } 1467 bs2 = bs*bs; 1468 for (i=0; i<x->nz; i++) { 1469 j = 0; 1470 while (j < bs2){ 1471 y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]); 1472 j++; 1473 } 1474 } 1475 PetscLogInfo(0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %d/%d = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)); 1476 } else { 1477 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1478 } 1479 PetscFunctionReturn(0); 1480 } 1481 1482 /* -------------------------------------------------------------------*/ 1483 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 1484 MatGetRow_SeqBAIJ, 1485 MatRestoreRow_SeqBAIJ, 1486 MatMult_SeqBAIJ_N, 1487 MatMultAdd_SeqBAIJ_N, 1488 MatMultTranspose_SeqBAIJ, 1489 MatMultTransposeAdd_SeqBAIJ, 1490 MatSolve_SeqBAIJ_N, 1491 0, 1492 0, 1493 0, 1494 MatLUFactor_SeqBAIJ, 1495 0, 1496 0, 1497 MatTranspose_SeqBAIJ, 1498 MatGetInfo_SeqBAIJ, 1499 MatEqual_SeqBAIJ, 1500 MatGetDiagonal_SeqBAIJ, 1501 MatDiagonalScale_SeqBAIJ, 1502 MatNorm_SeqBAIJ, 1503 0, 1504 MatAssemblyEnd_SeqBAIJ, 1505 0, 1506 MatSetOption_SeqBAIJ, 1507 MatZeroEntries_SeqBAIJ, 1508 MatZeroRows_SeqBAIJ, 1509 MatLUFactorSymbolic_SeqBAIJ, 1510 MatLUFactorNumeric_SeqBAIJ_N, 1511 0, 1512 0, 1513 MatSetUpPreallocation_SeqBAIJ, 1514 MatILUFactorSymbolic_SeqBAIJ, 1515 0, 1516 MatGetArray_SeqBAIJ, 1517 MatRestoreArray_SeqBAIJ, 1518 MatDuplicate_SeqBAIJ, 1519 0, 1520 0, 1521 MatILUFactor_SeqBAIJ, 1522 0, 1523 MatAXPY_SeqBAIJ, 1524 MatGetSubMatrices_SeqBAIJ, 1525 MatIncreaseOverlap_SeqBAIJ, 1526 MatGetValues_SeqBAIJ, 1527 0, 1528 MatPrintHelp_SeqBAIJ, 1529 MatScale_SeqBAIJ, 1530 0, 1531 0, 1532 0, 1533 MatGetBlockSize_SeqBAIJ, 1534 MatGetRowIJ_SeqBAIJ, 1535 MatRestoreRowIJ_SeqBAIJ, 1536 0, 1537 0, 1538 0, 1539 0, 1540 0, 1541 0, 1542 MatSetValuesBlocked_SeqBAIJ, 1543 MatGetSubMatrix_SeqBAIJ, 1544 MatDestroy_SeqBAIJ, 1545 MatView_SeqBAIJ, 1546 MatGetPetscMaps_Petsc, 1547 0, 1548 0, 1549 0, 1550 0, 1551 0, 1552 0, 1553 MatGetRowMax_SeqBAIJ, 1554 MatConvert_Basic}; 1555 1556 EXTERN_C_BEGIN 1557 #undef __FUNCT__ 1558 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 1559 int MatStoreValues_SeqBAIJ(Mat mat) 1560 { 1561 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1562 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1563 int ierr; 1564 1565 PetscFunctionBegin; 1566 if (aij->nonew != 1) { 1567 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1568 } 1569 1570 /* allocate space for values if not already there */ 1571 if (!aij->saved_values) { 1572 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1573 } 1574 1575 /* copy values over */ 1576 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1577 PetscFunctionReturn(0); 1578 } 1579 EXTERN_C_END 1580 1581 EXTERN_C_BEGIN 1582 #undef __FUNCT__ 1583 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 1584 int MatRetrieveValues_SeqBAIJ(Mat mat) 1585 { 1586 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1587 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 1588 1589 PetscFunctionBegin; 1590 if (aij->nonew != 1) { 1591 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1592 } 1593 if (!aij->saved_values) { 1594 SETERRQ(1,"Must call MatStoreValues(A);first"); 1595 } 1596 1597 /* copy values over */ 1598 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1599 PetscFunctionReturn(0); 1600 } 1601 EXTERN_C_END 1602 1603 EXTERN_C_BEGIN 1604 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*); 1605 EXTERN_C_END 1606 1607 EXTERN_C_BEGIN 1608 #undef __FUNCT__ 1609 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 1610 int MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,int bs,int nz,int *nnz) 1611 { 1612 Mat_SeqBAIJ *b; 1613 int i,len,ierr,mbs,nbs,bs2,newbs = bs; 1614 PetscTruth flg; 1615 1616 PetscFunctionBegin; 1617 1618 B->preallocated = PETSC_TRUE; 1619 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr); 1620 if (nnz && newbs != bs) { 1621 SETERRQ(1,"Cannot change blocksize from command line if setting nnz"); 1622 } 1623 bs = newbs; 1624 1625 mbs = B->m/bs; 1626 nbs = B->n/bs; 1627 bs2 = bs*bs; 1628 1629 if (mbs*bs!=B->m || nbs*bs!=B->n) { 1630 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs); 1631 } 1632 1633 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1634 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1635 if (nnz) { 1636 for (i=0; i<mbs; i++) { 1637 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1638 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs); 1639 } 1640 } 1641 1642 b = (Mat_SeqBAIJ*)B->data; 1643 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1644 B->ops->solve = MatSolve_SeqBAIJ_Update; 1645 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 1646 if (!flg) { 1647 switch (bs) { 1648 case 1: 1649 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1650 B->ops->mult = MatMult_SeqBAIJ_1; 1651 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 1652 break; 1653 case 2: 1654 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 1655 B->ops->mult = MatMult_SeqBAIJ_2; 1656 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 1657 break; 1658 case 3: 1659 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 1660 B->ops->mult = MatMult_SeqBAIJ_3; 1661 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 1662 break; 1663 case 4: 1664 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 1665 B->ops->mult = MatMult_SeqBAIJ_4; 1666 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 1667 break; 1668 case 5: 1669 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 1670 B->ops->mult = MatMult_SeqBAIJ_5; 1671 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 1672 break; 1673 case 6: 1674 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 1675 B->ops->mult = MatMult_SeqBAIJ_6; 1676 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 1677 break; 1678 case 7: 1679 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 1680 B->ops->mult = MatMult_SeqBAIJ_7; 1681 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 1682 break; 1683 default: 1684 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 1685 B->ops->mult = MatMult_SeqBAIJ_N; 1686 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 1687 break; 1688 } 1689 } 1690 b->bs = bs; 1691 b->mbs = mbs; 1692 b->nbs = nbs; 1693 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 1694 if (!nnz) { 1695 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1696 else if (nz <= 0) nz = 1; 1697 for (i=0; i<mbs; i++) b->imax[i] = nz; 1698 nz = nz*mbs; 1699 } else { 1700 nz = 0; 1701 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1702 } 1703 1704 /* allocate the matrix space */ 1705 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 1706 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1707 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1708 b->j = (int*)(b->a + nz*bs2); 1709 ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 1710 b->i = b->j + nz; 1711 b->singlemalloc = PETSC_TRUE; 1712 1713 b->i[0] = 0; 1714 for (i=1; i<mbs+1; i++) { 1715 b->i[i] = b->i[i-1] + b->imax[i-1]; 1716 } 1717 1718 /* b->ilen will count nonzeros in each block row so far. */ 1719 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 1720 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1721 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1722 1723 b->bs = bs; 1724 b->bs2 = bs2; 1725 b->mbs = mbs; 1726 b->nz = 0; 1727 b->maxnz = nz*bs2; 1728 B->info.nz_unneeded = (PetscReal)b->maxnz; 1729 PetscFunctionReturn(0); 1730 } 1731 EXTERN_C_END 1732 1733 EXTERN_C_BEGIN 1734 #undef __FUNCT__ 1735 #define __FUNCT__ "MatCreate_SeqBAIJ" 1736 int MatCreate_SeqBAIJ(Mat B) 1737 { 1738 int ierr,size; 1739 Mat_SeqBAIJ *b; 1740 1741 PetscFunctionBegin; 1742 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1743 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1744 1745 B->m = B->M = PetscMax(B->m,B->M); 1746 B->n = B->N = PetscMax(B->n,B->N); 1747 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 1748 B->data = (void*)b; 1749 ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 1750 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1751 B->factor = 0; 1752 B->lupivotthreshold = 1.0; 1753 B->mapping = 0; 1754 b->row = 0; 1755 b->col = 0; 1756 b->icol = 0; 1757 b->reallocs = 0; 1758 b->saved_values = 0; 1759 #if defined(PETSC_USE_MAT_SINGLE) 1760 b->setvalueslen = 0; 1761 b->setvaluescopy = PETSC_NULL; 1762 #endif 1763 1764 ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr); 1765 ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr); 1766 1767 b->sorted = PETSC_FALSE; 1768 b->roworiented = PETSC_TRUE; 1769 b->nonew = 0; 1770 b->diag = 0; 1771 b->solve_work = 0; 1772 b->mult_work = 0; 1773 B->spptr = 0; 1774 B->info.nz_unneeded = (PetscReal)b->maxnz; 1775 b->keepzeroedrows = PETSC_FALSE; 1776 b->xtoy = 0; 1777 b->XtoY = 0; 1778 1779 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1780 "MatStoreValues_SeqBAIJ", 1781 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 1782 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1783 "MatRetrieveValues_SeqBAIJ", 1784 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 1785 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 1786 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 1787 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 1788 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 1789 "MatConvert_SeqBAIJ_SeqAIJ", 1790 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 1791 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 1792 "MatSeqBAIJSetPreallocation_SeqBAIJ", 1793 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 1794 PetscFunctionReturn(0); 1795 } 1796 EXTERN_C_END 1797 1798 #undef __FUNCT__ 1799 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 1800 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1801 { 1802 Mat C; 1803 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 1804 int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr; 1805 1806 PetscFunctionBegin; 1807 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1808 1809 *B = 0; 1810 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1811 ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr); 1812 c = (Mat_SeqBAIJ*)C->data; 1813 1814 c->bs = a->bs; 1815 c->bs2 = a->bs2; 1816 c->mbs = a->mbs; 1817 c->nbs = a->nbs; 1818 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1819 1820 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1821 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1822 for (i=0; i<mbs; i++) { 1823 c->imax[i] = a->imax[i]; 1824 c->ilen[i] = a->ilen[i]; 1825 } 1826 1827 /* allocate the matrix space */ 1828 c->singlemalloc = PETSC_TRUE; 1829 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1830 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1831 c->j = (int*)(c->a + nz*bs2); 1832 c->i = c->j + nz; 1833 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1834 if (mbs > 0) { 1835 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1836 if (cpvalues == MAT_COPY_VALUES) { 1837 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1838 } else { 1839 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1840 } 1841 } 1842 1843 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1844 c->sorted = a->sorted; 1845 c->roworiented = a->roworiented; 1846 c->nonew = a->nonew; 1847 1848 if (a->diag) { 1849 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1850 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1851 for (i=0; i<mbs; i++) { 1852 c->diag[i] = a->diag[i]; 1853 } 1854 } else c->diag = 0; 1855 c->nz = a->nz; 1856 c->maxnz = a->maxnz; 1857 c->solve_work = 0; 1858 C->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1859 c->mult_work = 0; 1860 C->preallocated = PETSC_TRUE; 1861 C->assembled = PETSC_TRUE; 1862 *B = C; 1863 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1864 PetscFunctionReturn(0); 1865 } 1866 1867 EXTERN_C_BEGIN 1868 #undef __FUNCT__ 1869 #define __FUNCT__ "MatLoad_SeqBAIJ" 1870 int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A) 1871 { 1872 Mat_SeqBAIJ *a; 1873 Mat B; 1874 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1875 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1876 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1877 int *masked,nmask,tmp,bs2,ishift; 1878 PetscScalar *aa; 1879 MPI_Comm comm = ((PetscObject)viewer)->comm; 1880 #if defined(PETSC_HAVE_DSCPACK) 1881 PetscTruth flag; 1882 #endif 1883 1884 PetscFunctionBegin; 1885 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1886 bs2 = bs*bs; 1887 1888 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1889 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1890 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1891 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1892 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1893 M = header[1]; N = header[2]; nz = header[3]; 1894 1895 if (header[3] < 0) { 1896 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 1897 } 1898 1899 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1900 1901 /* 1902 This code adds extra rows to make sure the number of rows is 1903 divisible by the blocksize 1904 */ 1905 mbs = M/bs; 1906 extra_rows = bs - M + bs*(mbs); 1907 if (extra_rows == bs) extra_rows = 0; 1908 else mbs++; 1909 if (extra_rows) { 1910 PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 1911 } 1912 1913 /* read in row lengths */ 1914 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1915 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1916 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1917 1918 /* read in column indices */ 1919 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1920 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1921 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1922 1923 /* loop over row lengths determining block row lengths */ 1924 ierr = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr); 1925 ierr = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1926 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1927 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1928 masked = mask + mbs; 1929 rowcount = 0; nzcount = 0; 1930 for (i=0; i<mbs; i++) { 1931 nmask = 0; 1932 for (j=0; j<bs; j++) { 1933 kmax = rowlengths[rowcount]; 1934 for (k=0; k<kmax; k++) { 1935 tmp = jj[nzcount++]/bs; 1936 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1937 } 1938 rowcount++; 1939 } 1940 browlengths[i] += nmask; 1941 /* zero out the mask elements we set */ 1942 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1943 } 1944 1945 /* create our matrix */ 1946 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr); 1947 B = *A; 1948 a = (Mat_SeqBAIJ*)B->data; 1949 1950 /* set matrix "i" values */ 1951 a->i[0] = 0; 1952 for (i=1; i<= mbs; i++) { 1953 a->i[i] = a->i[i-1] + browlengths[i-1]; 1954 a->ilen[i-1] = browlengths[i-1]; 1955 } 1956 a->nz = 0; 1957 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 1958 1959 /* read in nonzero values */ 1960 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1961 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1962 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1963 1964 /* set "a" and "j" values into matrix */ 1965 nzcount = 0; jcount = 0; 1966 for (i=0; i<mbs; i++) { 1967 nzcountb = nzcount; 1968 nmask = 0; 1969 for (j=0; j<bs; j++) { 1970 kmax = rowlengths[i*bs+j]; 1971 for (k=0; k<kmax; k++) { 1972 tmp = jj[nzcount++]/bs; 1973 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1974 } 1975 } 1976 /* sort the masked values */ 1977 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1978 1979 /* set "j" values into matrix */ 1980 maskcount = 1; 1981 for (j=0; j<nmask; j++) { 1982 a->j[jcount++] = masked[j]; 1983 mask[masked[j]] = maskcount++; 1984 } 1985 /* set "a" values into matrix */ 1986 ishift = bs2*a->i[i]; 1987 for (j=0; j<bs; j++) { 1988 kmax = rowlengths[i*bs+j]; 1989 for (k=0; k<kmax; k++) { 1990 tmp = jj[nzcountb]/bs ; 1991 block = mask[tmp] - 1; 1992 point = jj[nzcountb] - bs*tmp; 1993 idx = ishift + bs2*block + j + bs*point; 1994 a->a[idx] = (MatScalar)aa[nzcountb++]; 1995 } 1996 } 1997 /* zero out the mask elements we set */ 1998 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1999 } 2000 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 2001 2002 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2003 ierr = PetscFree(browlengths);CHKERRQ(ierr); 2004 ierr = PetscFree(aa);CHKERRQ(ierr); 2005 ierr = PetscFree(jj);CHKERRQ(ierr); 2006 ierr = PetscFree(mask);CHKERRQ(ierr); 2007 2008 B->assembled = PETSC_TRUE; 2009 2010 #if defined(PETSC_HAVE_DSCPACK) 2011 ierr = PetscOptionsHasName(B->prefix,"-mat_baij_dscpack",&flag);CHKERRQ(ierr); 2012 if (flag) { ierr = MatUseDSCPACK_MPIBAIJ(B);CHKERRQ(ierr); } 2013 #endif 2014 2015 ierr = MatView_Private(B);CHKERRQ(ierr); 2016 PetscFunctionReturn(0); 2017 } 2018 EXTERN_C_END 2019 2020 #undef __FUNCT__ 2021 #define __FUNCT__ "MatCreateSeqBAIJ" 2022 /*@C 2023 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 2024 compressed row) format. For good matrix assembly performance the 2025 user should preallocate the matrix storage by setting the parameter nz 2026 (or the array nnz). By setting these parameters accurately, performance 2027 during matrix assembly can be increased by more than a factor of 50. 2028 2029 Collective on MPI_Comm 2030 2031 Input Parameters: 2032 + comm - MPI communicator, set to PETSC_COMM_SELF 2033 . bs - size of block 2034 . m - number of rows 2035 . n - number of columns 2036 . nz - number of nonzero blocks per block row (same for all rows) 2037 - nnz - array containing the number of nonzero blocks in the various block rows 2038 (possibly different for each block row) or PETSC_NULL 2039 2040 Output Parameter: 2041 . A - the matrix 2042 2043 Options Database Keys: 2044 . -mat_no_unroll - uses code that does not unroll the loops in the 2045 block calculations (much slower) 2046 . -mat_block_size - size of the blocks to use 2047 2048 Level: intermediate 2049 2050 Notes: 2051 A nonzero block is any block that as 1 or more nonzeros in it 2052 2053 The block AIJ format is fully compatible with standard Fortran 77 2054 storage. That is, the stored row and column indices can begin at 2055 either one (as in Fortran) or zero. See the users' manual for details. 2056 2057 Specify the preallocated storage with either nz or nnz (not both). 2058 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2059 allocation. For additional details, see the users manual chapter on 2060 matrices. 2061 2062 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2063 @*/ 2064 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A) 2065 { 2066 int ierr; 2067 2068 PetscFunctionBegin; 2069 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 2070 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2071 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 2072 PetscFunctionReturn(0); 2073 } 2074 2075 #undef __FUNCT__ 2076 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 2077 /*@C 2078 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 2079 per row in the matrix. For good matrix assembly performance the 2080 user should preallocate the matrix storage by setting the parameter nz 2081 (or the array nnz). By setting these parameters accurately, performance 2082 during matrix assembly can be increased by more than a factor of 50. 2083 2084 Collective on MPI_Comm 2085 2086 Input Parameters: 2087 + A - the matrix 2088 . bs - size of block 2089 . nz - number of block nonzeros per block row (same for all rows) 2090 - nnz - array containing the number of block nonzeros in the various block rows 2091 (possibly different for each block row) or PETSC_NULL 2092 2093 Options Database Keys: 2094 . -mat_no_unroll - uses code that does not unroll the loops in the 2095 block calculations (much slower) 2096 . -mat_block_size - size of the blocks to use 2097 2098 Level: intermediate 2099 2100 Notes: 2101 The block AIJ format is fully compatible with standard Fortran 77 2102 storage. That is, the stored row and column indices can begin at 2103 either one (as in Fortran) or zero. See the users' manual for details. 2104 2105 Specify the preallocated storage with either nz or nnz (not both). 2106 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2107 allocation. For additional details, see the users manual chapter on 2108 matrices. 2109 2110 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2111 @*/ 2112 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[]) 2113 { 2114 int ierr,(*f)(Mat,int,int,const int[]); 2115 2116 PetscFunctionBegin; 2117 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2118 if (f) { 2119 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 2120 } 2121 PetscFunctionReturn(0); 2122 } 2123