1 /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/ 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" 8 #include "src/vec/vecimpl.h" 9 #include "src/inline/spops.h" 10 #include "petscsys.h" /*I "petscmat.h" I*/ 11 12 /* 13 Special version for Fun3d sequential benchmark 14 */ 15 #if defined(PETSC_HAVE_FORTRAN_CAPS) 16 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 17 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 18 #define matsetvaluesblocked4_ matsetvaluesblocked4 19 #endif 20 21 EXTERN_C_BEGIN 22 #undef __FUNCT__ 23 #define __FUNCT__ "matsetvaluesblocked4_" 24 void matsetvaluesblocked4_(Mat *AA,int *mm,const int im[],int *nn,const int in[],const PetscScalar v[]) 25 { 26 Mat A = *AA; 27 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 28 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 29 int *ai=a->i,*ailen=a->ilen; 30 int *aj=a->j,stepval; 31 const PetscScalar *value = v; 32 MatScalar *ap,*aa = a->a,*bap; 33 34 PetscFunctionBegin; 35 stepval = (n-1)*4; 36 for (k=0; k<m; k++) { /* loop over added rows */ 37 row = im[k]; 38 rp = aj + ai[row]; 39 ap = aa + 16*ai[row]; 40 nrow = ailen[row]; 41 low = 0; 42 for (l=0; l<n; l++) { /* loop over added columns */ 43 col = in[l]; 44 value = v + k*(stepval+4)*4 + l*4; 45 low = 0; high = nrow; 46 while (high-low > 7) { 47 t = (low+high)/2; 48 if (rp[t] > col) high = t; 49 else low = t; 50 } 51 for (i=low; i<high; i++) { 52 if (rp[i] > col) break; 53 if (rp[i] == col) { 54 bap = ap + 16*i; 55 for (ii=0; ii<4; ii++,value+=stepval) { 56 for (jj=ii; jj<16; jj+=4) { 57 bap[jj] += *value++; 58 } 59 } 60 goto noinsert2; 61 } 62 } 63 N = nrow++ - 1; 64 /* shift up all the later entries in this row */ 65 for (ii=N; ii>=i; ii--) { 66 rp[ii+1] = rp[ii]; 67 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 68 } 69 if (N >= i) { 70 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 71 } 72 rp[i] = col; 73 bap = ap + 16*i; 74 for (ii=0; ii<4; ii++,value+=stepval) { 75 for (jj=ii; jj<16; jj+=4) { 76 bap[jj] = *value++; 77 } 78 } 79 noinsert2:; 80 low = i; 81 } 82 ailen[row] = nrow; 83 } 84 } 85 EXTERN_C_END 86 87 #if defined(PETSC_HAVE_FORTRAN_CAPS) 88 #define matsetvalues4_ MATSETVALUES4 89 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 90 #define matsetvalues4_ matsetvalues4 91 #endif 92 93 EXTERN_C_BEGIN 94 #undef __FUNCT__ 95 #define __FUNCT__ "MatSetValues4_" 96 void matsetvalues4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v) 97 { 98 Mat A = *AA; 99 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 100 int *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 101 int *ai=a->i,*ailen=a->ilen; 102 int *aj=a->j,brow,bcol; 103 int ridx,cidx; 104 MatScalar *ap,value,*aa=a->a,*bap; 105 106 PetscFunctionBegin; 107 for (k=0; k<m; k++) { /* loop over added rows */ 108 row = im[k]; brow = row/4; 109 rp = aj + ai[brow]; 110 ap = aa + 16*ai[brow]; 111 nrow = ailen[brow]; 112 low = 0; 113 for (l=0; l<n; l++) { /* loop over added columns */ 114 col = in[l]; bcol = col/4; 115 ridx = row % 4; cidx = col % 4; 116 value = v[l + k*n]; 117 low = 0; high = nrow; 118 while (high-low > 7) { 119 t = (low+high)/2; 120 if (rp[t] > bcol) high = t; 121 else low = t; 122 } 123 for (i=low; i<high; i++) { 124 if (rp[i] > bcol) break; 125 if (rp[i] == bcol) { 126 bap = ap + 16*i + 4*cidx + ridx; 127 *bap += value; 128 goto noinsert1; 129 } 130 } 131 N = nrow++ - 1; 132 /* shift up all the later entries in this row */ 133 for (ii=N; ii>=i; ii--) { 134 rp[ii+1] = rp[ii]; 135 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 136 } 137 if (N>=i) { 138 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 139 } 140 rp[i] = bcol; 141 ap[16*i + 4*cidx + ridx] = value; 142 noinsert1:; 143 low = i; 144 } 145 ailen[brow] = nrow; 146 } 147 } 148 EXTERN_C_END 149 150 /* UGLY, ugly, ugly 151 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 152 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 153 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 154 converts the entries into single precision and then calls ..._MatScalar() to put them 155 into the single precision data structures. 156 */ 157 #if defined(PETSC_USE_MAT_SINGLE) 158 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 159 #else 160 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 161 #endif 162 163 #define CHUNKSIZE 10 164 165 /* 166 Checks for missing diagonals 167 */ 168 #undef __FUNCT__ 169 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 170 int MatMissingDiagonal_SeqBAIJ(Mat A) 171 { 172 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 173 int *diag,*jj = a->j,i,ierr; 174 175 PetscFunctionBegin; 176 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 177 diag = a->diag; 178 for (i=0; i<a->mbs; i++) { 179 if (jj[diag[i]] != i) { 180 SETERRQ1(1,"Matrix is missing diagonal number %d",i); 181 } 182 } 183 PetscFunctionReturn(0); 184 } 185 186 #undef __FUNCT__ 187 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 188 int MatMarkDiagonal_SeqBAIJ(Mat A) 189 { 190 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 191 int i,j,*diag,m = a->mbs,ierr; 192 193 PetscFunctionBegin; 194 if (a->diag) PetscFunctionReturn(0); 195 196 ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr); 197 PetscLogObjectMemory(A,(m+1)*sizeof(int)); 198 for (i=0; i<m; i++) { 199 diag[i] = a->i[i+1]; 200 for (j=a->i[i]; j<a->i[i+1]; j++) { 201 if (a->j[j] == i) { 202 diag[i] = j; 203 break; 204 } 205 } 206 } 207 a->diag = diag; 208 PetscFunctionReturn(0); 209 } 210 211 212 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 213 214 #undef __FUNCT__ 215 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 216 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done) 217 { 218 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 219 int ierr,n = a->mbs,i; 220 221 PetscFunctionBegin; 222 *nn = n; 223 if (!ia) PetscFunctionReturn(0); 224 if (symmetric) { 225 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 226 } else if (oshift == 1) { 227 /* temporarily add 1 to i and j indices */ 228 int nz = a->i[n]; 229 for (i=0; i<nz; i++) a->j[i]++; 230 for (i=0; i<n+1; i++) a->i[i]++; 231 *ia = a->i; *ja = a->j; 232 } else { 233 *ia = a->i; *ja = a->j; 234 } 235 236 PetscFunctionReturn(0); 237 } 238 239 #undef __FUNCT__ 240 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 241 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done) 242 { 243 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 244 int i,n = a->mbs,ierr; 245 246 PetscFunctionBegin; 247 if (!ia) PetscFunctionReturn(0); 248 if (symmetric) { 249 ierr = PetscFree(*ia);CHKERRQ(ierr); 250 ierr = PetscFree(*ja);CHKERRQ(ierr); 251 } else if (oshift == 1) { 252 int nz = a->i[n]-1; 253 for (i=0; i<nz; i++) a->j[i]--; 254 for (i=0; i<n+1; i++) a->i[i]--; 255 } 256 PetscFunctionReturn(0); 257 } 258 259 #undef __FUNCT__ 260 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ" 261 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs) 262 { 263 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 264 265 PetscFunctionBegin; 266 *bs = baij->bs; 267 PetscFunctionReturn(0); 268 } 269 270 271 #undef __FUNCT__ 272 #define __FUNCT__ "MatDestroy_SeqBAIJ" 273 int MatDestroy_SeqBAIJ(Mat A) 274 { 275 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 276 int ierr; 277 278 PetscFunctionBegin; 279 #if defined(PETSC_USE_LOG) 280 PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz); 281 #endif 282 ierr = PetscFree(a->a);CHKERRQ(ierr); 283 if (!a->singlemalloc) { 284 ierr = PetscFree(a->i);CHKERRQ(ierr); 285 ierr = PetscFree(a->j);CHKERRQ(ierr); 286 } 287 if (a->row) { 288 ierr = ISDestroy(a->row);CHKERRQ(ierr); 289 } 290 if (a->col) { 291 ierr = ISDestroy(a->col);CHKERRQ(ierr); 292 } 293 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 294 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 295 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 296 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 297 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 298 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 299 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 300 #if defined(PETSC_USE_MAT_SINGLE) 301 if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);} 302 #endif 303 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 304 305 ierr = PetscFree(a);CHKERRQ(ierr); 306 PetscFunctionReturn(0); 307 } 308 309 #undef __FUNCT__ 310 #define __FUNCT__ "MatSetOption_SeqBAIJ" 311 int MatSetOption_SeqBAIJ(Mat A,MatOption op) 312 { 313 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 314 315 PetscFunctionBegin; 316 switch (op) { 317 case MAT_ROW_ORIENTED: 318 a->roworiented = PETSC_TRUE; 319 break; 320 case MAT_COLUMN_ORIENTED: 321 a->roworiented = PETSC_FALSE; 322 break; 323 case MAT_COLUMNS_SORTED: 324 a->sorted = PETSC_TRUE; 325 break; 326 case MAT_COLUMNS_UNSORTED: 327 a->sorted = PETSC_FALSE; 328 break; 329 case MAT_KEEP_ZEROED_ROWS: 330 a->keepzeroedrows = PETSC_TRUE; 331 break; 332 case MAT_NO_NEW_NONZERO_LOCATIONS: 333 a->nonew = 1; 334 break; 335 case MAT_NEW_NONZERO_LOCATION_ERR: 336 a->nonew = -1; 337 break; 338 case MAT_NEW_NONZERO_ALLOCATION_ERR: 339 a->nonew = -2; 340 break; 341 case MAT_YES_NEW_NONZERO_LOCATIONS: 342 a->nonew = 0; 343 break; 344 case MAT_ROWS_SORTED: 345 case MAT_ROWS_UNSORTED: 346 case MAT_YES_NEW_DIAGONALS: 347 case MAT_IGNORE_OFF_PROC_ENTRIES: 348 case MAT_USE_HASH_TABLE: 349 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 350 break; 351 case MAT_NO_NEW_DIAGONALS: 352 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 353 default: 354 SETERRQ(PETSC_ERR_SUP,"unknown option"); 355 } 356 PetscFunctionReturn(0); 357 } 358 359 #undef __FUNCT__ 360 #define __FUNCT__ "MatGetRow_SeqBAIJ" 361 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 362 { 363 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 364 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr; 365 MatScalar *aa,*aa_i; 366 PetscScalar *v_i; 367 368 PetscFunctionBegin; 369 bs = a->bs; 370 ai = a->i; 371 aj = a->j; 372 aa = a->a; 373 bs2 = a->bs2; 374 375 if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range"); 376 377 bn = row/bs; /* Block number */ 378 bp = row % bs; /* Block Position */ 379 M = ai[bn+1] - ai[bn]; 380 *nz = bs*M; 381 382 if (v) { 383 *v = 0; 384 if (*nz) { 385 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 386 for (i=0; i<M; i++) { /* for each block in the block row */ 387 v_i = *v + i*bs; 388 aa_i = aa + bs2*(ai[bn] + i); 389 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 390 } 391 } 392 } 393 394 if (idx) { 395 *idx = 0; 396 if (*nz) { 397 ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr); 398 for (i=0; i<M; i++) { /* for each block in the block row */ 399 idx_i = *idx + i*bs; 400 itmp = bs*aj[ai[bn] + i]; 401 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 402 } 403 } 404 } 405 PetscFunctionReturn(0); 406 } 407 408 #undef __FUNCT__ 409 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 410 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 411 { 412 int ierr; 413 414 PetscFunctionBegin; 415 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 416 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 417 PetscFunctionReturn(0); 418 } 419 420 #undef __FUNCT__ 421 #define __FUNCT__ "MatTranspose_SeqBAIJ" 422 int MatTranspose_SeqBAIJ(Mat A,Mat *B) 423 { 424 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 425 Mat C; 426 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 427 int *rows,*cols,bs2=a->bs2; 428 PetscScalar *array; 429 430 PetscFunctionBegin; 431 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 432 ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr); 433 ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr); 434 435 #if defined(PETSC_USE_MAT_SINGLE) 436 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 437 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 438 #else 439 array = a->a; 440 #endif 441 442 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 443 ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr); 444 ierr = PetscFree(col);CHKERRQ(ierr); 445 ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr); 446 cols = rows + bs; 447 for (i=0; i<mbs; i++) { 448 cols[0] = i*bs; 449 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 450 len = ai[i+1] - ai[i]; 451 for (j=0; j<len; j++) { 452 rows[0] = (*aj++)*bs; 453 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 454 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 455 array += bs2; 456 } 457 } 458 ierr = PetscFree(rows);CHKERRQ(ierr); 459 #if defined(PETSC_USE_MAT_SINGLE) 460 ierr = PetscFree(array); 461 #endif 462 463 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 464 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 465 466 if (B) { 467 *B = C; 468 } else { 469 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 470 } 471 PetscFunctionReturn(0); 472 } 473 474 #undef __FUNCT__ 475 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 476 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 477 { 478 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 479 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 480 PetscScalar *aa; 481 FILE *file; 482 483 PetscFunctionBegin; 484 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 485 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 486 col_lens[0] = MAT_FILE_COOKIE; 487 488 col_lens[1] = A->m; 489 col_lens[2] = A->n; 490 col_lens[3] = a->nz*bs2; 491 492 /* store lengths of each row and write (including header) to file */ 493 count = 0; 494 for (i=0; i<a->mbs; i++) { 495 for (j=0; j<bs; j++) { 496 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 497 } 498 } 499 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 500 ierr = PetscFree(col_lens);CHKERRQ(ierr); 501 502 /* store column indices (zero start index) */ 503 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 504 count = 0; 505 for (i=0; i<a->mbs; i++) { 506 for (j=0; j<bs; j++) { 507 for (k=a->i[i]; k<a->i[i+1]; k++) { 508 for (l=0; l<bs; l++) { 509 jj[count++] = bs*a->j[k] + l; 510 } 511 } 512 } 513 } 514 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr); 515 ierr = PetscFree(jj);CHKERRQ(ierr); 516 517 /* store nonzero values */ 518 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 519 count = 0; 520 for (i=0; i<a->mbs; i++) { 521 for (j=0; j<bs; j++) { 522 for (k=a->i[i]; k<a->i[i+1]; k++) { 523 for (l=0; l<bs; l++) { 524 aa[count++] = a->a[bs2*k + l*bs + j]; 525 } 526 } 527 } 528 } 529 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr); 530 ierr = PetscFree(aa);CHKERRQ(ierr); 531 532 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 533 if (file) { 534 fprintf(file,"-matload_block_size %d\n",a->bs); 535 } 536 PetscFunctionReturn(0); 537 } 538 539 #undef __FUNCT__ 540 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 541 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 542 { 543 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 544 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 545 PetscViewerFormat format; 546 547 PetscFunctionBegin; 548 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 549 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 550 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 551 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 552 Mat aij; 553 ierr = MatConvert(A,MATSEQAIJ,&aij);CHKERRQ(ierr); 554 ierr = MatView(aij,viewer);CHKERRQ(ierr); 555 ierr = MatDestroy(aij);CHKERRQ(ierr); 556 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 557 PetscFunctionReturn(0); 558 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 559 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 560 for (i=0; i<a->mbs; i++) { 561 for (j=0; j<bs; j++) { 562 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 563 for (k=a->i[i]; k<a->i[i+1]; k++) { 564 for (l=0; l<bs; l++) { 565 #if defined(PETSC_USE_COMPLEX) 566 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 567 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %gi) ",bs*a->j[k]+l, 568 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 569 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 570 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %gi) ",bs*a->j[k]+l, 571 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 572 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 573 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 574 } 575 #else 576 if (a->a[bs2*k + l*bs + j] != 0.0) { 577 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 578 } 579 #endif 580 } 581 } 582 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 583 } 584 } 585 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 586 } else { 587 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 588 for (i=0; i<a->mbs; i++) { 589 for (j=0; j<bs; j++) { 590 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 591 for (k=a->i[i]; k<a->i[i+1]; k++) { 592 for (l=0; l<bs; l++) { 593 #if defined(PETSC_USE_COMPLEX) 594 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 595 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 596 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 597 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 598 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 599 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 600 } else { 601 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 602 } 603 #else 604 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 605 #endif 606 } 607 } 608 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 609 } 610 } 611 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 612 } 613 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 614 PetscFunctionReturn(0); 615 } 616 617 #undef __FUNCT__ 618 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 619 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 620 { 621 Mat A = (Mat) Aa; 622 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 623 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2; 624 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 625 MatScalar *aa; 626 PetscViewer viewer; 627 628 PetscFunctionBegin; 629 630 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 631 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 632 633 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 634 635 /* loop over matrix elements drawing boxes */ 636 color = PETSC_DRAW_BLUE; 637 for (i=0,row=0; i<mbs; i++,row+=bs) { 638 for (j=a->i[i]; j<a->i[i+1]; j++) { 639 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 640 x_l = a->j[j]*bs; x_r = x_l + 1.0; 641 aa = a->a + j*bs2; 642 for (k=0; k<bs; k++) { 643 for (l=0; l<bs; l++) { 644 if (PetscRealPart(*aa++) >= 0.) continue; 645 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 646 } 647 } 648 } 649 } 650 color = PETSC_DRAW_CYAN; 651 for (i=0,row=0; i<mbs; i++,row+=bs) { 652 for (j=a->i[i]; j<a->i[i+1]; j++) { 653 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 654 x_l = a->j[j]*bs; x_r = x_l + 1.0; 655 aa = a->a + j*bs2; 656 for (k=0; k<bs; k++) { 657 for (l=0; l<bs; l++) { 658 if (PetscRealPart(*aa++) != 0.) continue; 659 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 660 } 661 } 662 } 663 } 664 665 color = PETSC_DRAW_RED; 666 for (i=0,row=0; i<mbs; i++,row+=bs) { 667 for (j=a->i[i]; j<a->i[i+1]; j++) { 668 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 669 x_l = a->j[j]*bs; x_r = x_l + 1.0; 670 aa = a->a + j*bs2; 671 for (k=0; k<bs; k++) { 672 for (l=0; l<bs; l++) { 673 if (PetscRealPart(*aa++) <= 0.) continue; 674 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 675 } 676 } 677 } 678 } 679 PetscFunctionReturn(0); 680 } 681 682 #undef __FUNCT__ 683 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 684 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 685 { 686 int ierr; 687 PetscReal xl,yl,xr,yr,w,h; 688 PetscDraw draw; 689 PetscTruth isnull; 690 691 PetscFunctionBegin; 692 693 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 694 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 695 696 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 697 xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0; 698 xr += w; yr += h; xl = -w; yl = -h; 699 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 700 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 701 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 702 PetscFunctionReturn(0); 703 } 704 705 #undef __FUNCT__ 706 #define __FUNCT__ "MatView_SeqBAIJ" 707 int MatView_SeqBAIJ(Mat A,PetscViewer viewer) 708 { 709 int ierr; 710 PetscTruth issocket,isascii,isbinary,isdraw; 711 712 PetscFunctionBegin; 713 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 714 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 715 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 716 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 717 if (issocket) { 718 SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 719 } else if (isascii){ 720 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 721 } else if (isbinary) { 722 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 723 } else if (isdraw) { 724 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 725 } else { 726 SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name); 727 } 728 PetscFunctionReturn(0); 729 } 730 731 732 #undef __FUNCT__ 733 #define __FUNCT__ "MatGetValues_SeqBAIJ" 734 int MatGetValues_SeqBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[]) 735 { 736 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 737 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 738 int *ai = a->i,*ailen = a->ilen; 739 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 740 MatScalar *ap,*aa = a->a,zero = 0.0; 741 742 PetscFunctionBegin; 743 for (k=0; k<m; k++) { /* loop over rows */ 744 row = im[k]; brow = row/bs; 745 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 746 if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 747 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 748 nrow = ailen[brow]; 749 for (l=0; l<n; l++) { /* loop over columns */ 750 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 751 if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 752 col = in[l] ; 753 bcol = col/bs; 754 cidx = col%bs; 755 ridx = row%bs; 756 high = nrow; 757 low = 0; /* assume unsorted */ 758 while (high-low > 5) { 759 t = (low+high)/2; 760 if (rp[t] > bcol) high = t; 761 else low = t; 762 } 763 for (i=low; i<high; i++) { 764 if (rp[i] > bcol) break; 765 if (rp[i] == bcol) { 766 *v++ = ap[bs2*i+bs*cidx+ridx]; 767 goto finished; 768 } 769 } 770 *v++ = zero; 771 finished:; 772 } 773 } 774 PetscFunctionReturn(0); 775 } 776 777 #if defined(PETSC_USE_MAT_SINGLE) 778 #undef __FUNCT__ 779 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 780 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 781 { 782 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 783 int ierr,i,N = m*n*b->bs2; 784 MatScalar *vsingle; 785 786 PetscFunctionBegin; 787 if (N > b->setvalueslen) { 788 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 789 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 790 b->setvalueslen = N; 791 } 792 vsingle = b->setvaluescopy; 793 for (i=0; i<N; i++) { 794 vsingle[i] = v[i]; 795 } 796 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 797 PetscFunctionReturn(0); 798 } 799 #endif 800 801 802 #undef __FUNCT__ 803 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 804 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode is) 805 { 806 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 807 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 808 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 809 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 810 PetscTruth roworiented=a->roworiented; 811 const MatScalar *value = v; 812 MatScalar *ap,*aa = a->a,*bap; 813 814 PetscFunctionBegin; 815 if (roworiented) { 816 stepval = (n-1)*bs; 817 } else { 818 stepval = (m-1)*bs; 819 } 820 for (k=0; k<m; k++) { /* loop over added rows */ 821 row = im[k]; 822 if (row < 0) continue; 823 #if defined(PETSC_USE_BOPT_g) 824 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1); 825 #endif 826 rp = aj + ai[row]; 827 ap = aa + bs2*ai[row]; 828 rmax = imax[row]; 829 nrow = ailen[row]; 830 low = 0; 831 for (l=0; l<n; l++) { /* loop over added columns */ 832 if (in[l] < 0) continue; 833 #if defined(PETSC_USE_BOPT_g) 834 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],a->nbs-1); 835 #endif 836 col = in[l]; 837 if (roworiented) { 838 value = v + k*(stepval+bs)*bs + l*bs; 839 } else { 840 value = v + l*(stepval+bs)*bs + k*bs; 841 } 842 if (!sorted) low = 0; high = nrow; 843 while (high-low > 7) { 844 t = (low+high)/2; 845 if (rp[t] > col) high = t; 846 else low = t; 847 } 848 for (i=low; i<high; i++) { 849 if (rp[i] > col) break; 850 if (rp[i] == col) { 851 bap = ap + bs2*i; 852 if (roworiented) { 853 if (is == ADD_VALUES) { 854 for (ii=0; ii<bs; ii++,value+=stepval) { 855 for (jj=ii; jj<bs2; jj+=bs) { 856 bap[jj] += *value++; 857 } 858 } 859 } else { 860 for (ii=0; ii<bs; ii++,value+=stepval) { 861 for (jj=ii; jj<bs2; jj+=bs) { 862 bap[jj] = *value++; 863 } 864 } 865 } 866 } else { 867 if (is == ADD_VALUES) { 868 for (ii=0; ii<bs; ii++,value+=stepval) { 869 for (jj=0; jj<bs; jj++) { 870 *bap++ += *value++; 871 } 872 } 873 } else { 874 for (ii=0; ii<bs; ii++,value+=stepval) { 875 for (jj=0; jj<bs; jj++) { 876 *bap++ = *value++; 877 } 878 } 879 } 880 } 881 goto noinsert2; 882 } 883 } 884 if (nonew == 1) goto noinsert2; 885 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 886 if (nrow >= rmax) { 887 /* there is no extra room in row, therefore enlarge */ 888 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 889 MatScalar *new_a; 890 891 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 892 893 /* malloc new storage space */ 894 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 895 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 896 new_j = (int*)(new_a + bs2*new_nz); 897 new_i = new_j + new_nz; 898 899 /* copy over old data into new slots */ 900 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 901 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 902 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 903 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 904 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 905 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 906 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 907 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 908 /* free up old matrix storage */ 909 ierr = PetscFree(a->a);CHKERRQ(ierr); 910 if (!a->singlemalloc) { 911 ierr = PetscFree(a->i);CHKERRQ(ierr); 912 ierr = PetscFree(a->j);CHKERRQ(ierr); 913 } 914 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 915 a->singlemalloc = PETSC_TRUE; 916 917 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 918 rmax = imax[row] = imax[row] + CHUNKSIZE; 919 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 920 a->maxnz += bs2*CHUNKSIZE; 921 a->reallocs++; 922 a->nz++; 923 } 924 N = nrow++ - 1; 925 /* shift up all the later entries in this row */ 926 for (ii=N; ii>=i; ii--) { 927 rp[ii+1] = rp[ii]; 928 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 929 } 930 if (N >= i) { 931 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 932 } 933 rp[i] = col; 934 bap = ap + bs2*i; 935 if (roworiented) { 936 for (ii=0; ii<bs; ii++,value+=stepval) { 937 for (jj=ii; jj<bs2; jj+=bs) { 938 bap[jj] = *value++; 939 } 940 } 941 } else { 942 for (ii=0; ii<bs; ii++,value+=stepval) { 943 for (jj=0; jj<bs; jj++) { 944 *bap++ = *value++; 945 } 946 } 947 } 948 noinsert2:; 949 low = i; 950 } 951 ailen[row] = nrow; 952 } 953 PetscFunctionReturn(0); 954 } 955 956 #undef __FUNCT__ 957 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 958 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 959 { 960 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 961 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 962 int m = A->m,*ip,N,*ailen = a->ilen; 963 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 964 MatScalar *aa = a->a,*ap; 965 966 PetscFunctionBegin; 967 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 968 969 if (m) rmax = ailen[0]; 970 for (i=1; i<mbs; i++) { 971 /* move each row back by the amount of empty slots (fshift) before it*/ 972 fshift += imax[i-1] - ailen[i-1]; 973 rmax = PetscMax(rmax,ailen[i]); 974 if (fshift) { 975 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 976 N = ailen[i]; 977 for (j=0; j<N; j++) { 978 ip[j-fshift] = ip[j]; 979 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 980 } 981 } 982 ai[i] = ai[i-1] + ailen[i-1]; 983 } 984 if (mbs) { 985 fshift += imax[mbs-1] - ailen[mbs-1]; 986 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 987 } 988 /* reset ilen and imax for each row */ 989 for (i=0; i<mbs; i++) { 990 ailen[i] = imax[i] = ai[i+1] - ai[i]; 991 } 992 a->nz = ai[mbs]; 993 994 /* diagonals may have moved, so kill the diagonal pointers */ 995 if (fshift && a->diag) { 996 ierr = PetscFree(a->diag);CHKERRQ(ierr); 997 PetscLogObjectMemory(A,-(mbs+1)*sizeof(int)); 998 a->diag = 0; 999 } 1000 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2); 1001 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs); 1002 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 1003 a->reallocs = 0; 1004 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1005 1006 PetscFunctionReturn(0); 1007 } 1008 1009 1010 1011 /* 1012 This function returns an array of flags which indicate the locations of contiguous 1013 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1014 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1015 Assume: sizes should be long enough to hold all the values. 1016 */ 1017 #undef __FUNCT__ 1018 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1019 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 1020 { 1021 int i,j,k,row; 1022 PetscTruth flg; 1023 1024 PetscFunctionBegin; 1025 for (i=0,j=0; i<n; j++) { 1026 row = idx[i]; 1027 if (row%bs!=0) { /* Not the begining of a block */ 1028 sizes[j] = 1; 1029 i++; 1030 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1031 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1032 i++; 1033 } else { /* Begining of the block, so check if the complete block exists */ 1034 flg = PETSC_TRUE; 1035 for (k=1; k<bs; k++) { 1036 if (row+k != idx[i+k]) { /* break in the block */ 1037 flg = PETSC_FALSE; 1038 break; 1039 } 1040 } 1041 if (flg == PETSC_TRUE) { /* No break in the bs */ 1042 sizes[j] = bs; 1043 i+= bs; 1044 } else { 1045 sizes[j] = 1; 1046 i++; 1047 } 1048 } 1049 } 1050 *bs_max = j; 1051 PetscFunctionReturn(0); 1052 } 1053 1054 #undef __FUNCT__ 1055 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1056 int MatZeroRows_SeqBAIJ(Mat A,IS is,const PetscScalar *diag) 1057 { 1058 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1059 int ierr,i,j,k,count,is_n,*is_idx,*rows; 1060 int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max; 1061 PetscScalar zero = 0.0; 1062 MatScalar *aa; 1063 1064 PetscFunctionBegin; 1065 /* Make a copy of the IS and sort it */ 1066 ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr); 1067 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1068 1069 /* allocate memory for rows,sizes */ 1070 ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr); 1071 sizes = rows + is_n; 1072 1073 /* copy IS values to rows, and sort them */ 1074 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1075 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1076 if (baij->keepzeroedrows) { 1077 for (i=0; i<is_n; i++) { sizes[i] = 1; } 1078 bs_max = is_n; 1079 } else { 1080 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1081 } 1082 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 1083 1084 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1085 row = rows[j]; 1086 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row); 1087 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1088 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 1089 if (sizes[i] == bs && !baij->keepzeroedrows) { 1090 if (diag) { 1091 if (baij->ilen[row/bs] > 0) { 1092 baij->ilen[row/bs] = 1; 1093 baij->j[baij->i[row/bs]] = row/bs; 1094 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 1095 } 1096 /* Now insert all the diagonal values for this bs */ 1097 for (k=0; k<bs; k++) { 1098 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 1099 } 1100 } else { /* (!diag) */ 1101 baij->ilen[row/bs] = 0; 1102 } /* end (!diag) */ 1103 } else { /* (sizes[i] != bs) */ 1104 #if defined (PETSC_USE_DEBUG) 1105 if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1"); 1106 #endif 1107 for (k=0; k<count; k++) { 1108 aa[0] = zero; 1109 aa += bs; 1110 } 1111 if (diag) { 1112 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1113 } 1114 } 1115 } 1116 1117 ierr = PetscFree(rows);CHKERRQ(ierr); 1118 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1119 PetscFunctionReturn(0); 1120 } 1121 1122 #undef __FUNCT__ 1123 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1124 int MatSetValues_SeqBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is) 1125 { 1126 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1127 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 1128 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1129 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 1130 int ridx,cidx,bs2=a->bs2,ierr; 1131 PetscTruth roworiented=a->roworiented; 1132 MatScalar *ap,value,*aa=a->a,*bap; 1133 1134 PetscFunctionBegin; 1135 for (k=0; k<m; k++) { /* loop over added rows */ 1136 row = im[k]; brow = row/bs; 1137 if (row < 0) continue; 1138 #if defined(PETSC_USE_BOPT_g) 1139 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1); 1140 #endif 1141 rp = aj + ai[brow]; 1142 ap = aa + bs2*ai[brow]; 1143 rmax = imax[brow]; 1144 nrow = ailen[brow]; 1145 low = 0; 1146 for (l=0; l<n; l++) { /* loop over added columns */ 1147 if (in[l] < 0) continue; 1148 #if defined(PETSC_USE_BOPT_g) 1149 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1); 1150 #endif 1151 col = in[l]; bcol = col/bs; 1152 ridx = row % bs; cidx = col % bs; 1153 if (roworiented) { 1154 value = v[l + k*n]; 1155 } else { 1156 value = v[k + l*m]; 1157 } 1158 if (!sorted) low = 0; high = nrow; 1159 while (high-low > 7) { 1160 t = (low+high)/2; 1161 if (rp[t] > bcol) high = t; 1162 else low = t; 1163 } 1164 for (i=low; i<high; i++) { 1165 if (rp[i] > bcol) break; 1166 if (rp[i] == bcol) { 1167 bap = ap + bs2*i + bs*cidx + ridx; 1168 if (is == ADD_VALUES) *bap += value; 1169 else *bap = value; 1170 goto noinsert1; 1171 } 1172 } 1173 if (nonew == 1) goto noinsert1; 1174 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 1175 if (nrow >= rmax) { 1176 /* there is no extra room in row, therefore enlarge */ 1177 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1178 MatScalar *new_a; 1179 1180 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 1181 1182 /* Malloc new storage space */ 1183 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1184 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1185 new_j = (int*)(new_a + bs2*new_nz); 1186 new_i = new_j + new_nz; 1187 1188 /* copy over old data into new slots */ 1189 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 1190 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1191 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 1192 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1193 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 1194 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1195 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1196 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1197 /* free up old matrix storage */ 1198 ierr = PetscFree(a->a);CHKERRQ(ierr); 1199 if (!a->singlemalloc) { 1200 ierr = PetscFree(a->i);CHKERRQ(ierr); 1201 ierr = PetscFree(a->j);CHKERRQ(ierr); 1202 } 1203 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1204 a->singlemalloc = PETSC_TRUE; 1205 1206 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1207 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1208 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 1209 a->maxnz += bs2*CHUNKSIZE; 1210 a->reallocs++; 1211 a->nz++; 1212 } 1213 N = nrow++ - 1; 1214 /* shift up all the later entries in this row */ 1215 for (ii=N; ii>=i; ii--) { 1216 rp[ii+1] = rp[ii]; 1217 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1218 } 1219 if (N>=i) { 1220 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1221 } 1222 rp[i] = bcol; 1223 ap[bs2*i + bs*cidx + ridx] = value; 1224 noinsert1:; 1225 low = i; 1226 } 1227 ailen[brow] = nrow; 1228 } 1229 PetscFunctionReturn(0); 1230 } 1231 1232 1233 #undef __FUNCT__ 1234 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1235 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info) 1236 { 1237 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1238 Mat outA; 1239 int ierr; 1240 PetscTruth row_identity,col_identity; 1241 1242 PetscFunctionBegin; 1243 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1244 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1245 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1246 if (!row_identity || !col_identity) { 1247 SETERRQ(1,"Row and column permutations must be identity for in-place ILU"); 1248 } 1249 1250 outA = inA; 1251 inA->factor = FACTOR_LU; 1252 1253 if (!a->diag) { 1254 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1255 } 1256 1257 a->row = row; 1258 a->col = col; 1259 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1260 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1261 1262 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1263 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1264 PetscLogObjectParent(inA,a->icol); 1265 1266 /* 1267 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1268 for ILU(0) factorization with natural ordering 1269 */ 1270 if (a->bs < 8) { 1271 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1272 } else { 1273 if (!a->solve_work) { 1274 ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1275 PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar)); 1276 } 1277 } 1278 1279 ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr); 1280 1281 PetscFunctionReturn(0); 1282 } 1283 #undef __FUNCT__ 1284 #define __FUNCT__ "MatPrintHelp_SeqBAIJ" 1285 int MatPrintHelp_SeqBAIJ(Mat A) 1286 { 1287 static PetscTruth called = PETSC_FALSE; 1288 MPI_Comm comm = A->comm; 1289 int ierr; 1290 1291 PetscFunctionBegin; 1292 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1293 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1294 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1295 PetscFunctionReturn(0); 1296 } 1297 1298 EXTERN_C_BEGIN 1299 #undef __FUNCT__ 1300 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1301 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices) 1302 { 1303 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1304 int i,nz,nbs; 1305 1306 PetscFunctionBegin; 1307 nz = baij->maxnz/baij->bs2; 1308 nbs = baij->nbs; 1309 for (i=0; i<nz; i++) { 1310 baij->j[i] = indices[i]; 1311 } 1312 baij->nz = nz; 1313 for (i=0; i<nbs; i++) { 1314 baij->ilen[i] = baij->imax[i]; 1315 } 1316 1317 PetscFunctionReturn(0); 1318 } 1319 EXTERN_C_END 1320 1321 #undef __FUNCT__ 1322 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1323 /*@ 1324 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1325 in the matrix. 1326 1327 Input Parameters: 1328 + mat - the SeqBAIJ matrix 1329 - indices - the column indices 1330 1331 Level: advanced 1332 1333 Notes: 1334 This can be called if you have precomputed the nonzero structure of the 1335 matrix and want to provide it to the matrix object to improve the performance 1336 of the MatSetValues() operation. 1337 1338 You MUST have set the correct numbers of nonzeros per row in the call to 1339 MatCreateSeqBAIJ(). 1340 1341 MUST be called before any calls to MatSetValues(); 1342 1343 @*/ 1344 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices) 1345 { 1346 int ierr,(*f)(Mat,int *); 1347 1348 PetscFunctionBegin; 1349 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1350 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1351 if (f) { 1352 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1353 } else { 1354 SETERRQ(1,"Wrong type of matrix to set column indices"); 1355 } 1356 PetscFunctionReturn(0); 1357 } 1358 1359 #undef __FUNCT__ 1360 #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1361 int MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1362 { 1363 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1364 int ierr,i,j,n,row,bs,*ai,*aj,mbs; 1365 PetscReal atmp; 1366 PetscScalar *x,zero = 0.0; 1367 MatScalar *aa; 1368 int ncols,brow,krow,kcol; 1369 1370 PetscFunctionBegin; 1371 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1372 bs = a->bs; 1373 aa = a->a; 1374 ai = a->i; 1375 aj = a->j; 1376 mbs = a->mbs; 1377 1378 ierr = VecSet(&zero,v);CHKERRQ(ierr); 1379 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1380 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1381 if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1382 for (i=0; i<mbs; i++) { 1383 ncols = ai[1] - ai[0]; ai++; 1384 brow = bs*i; 1385 for (j=0; j<ncols; j++){ 1386 /* bcol = bs*(*aj); */ 1387 for (kcol=0; kcol<bs; kcol++){ 1388 for (krow=0; krow<bs; krow++){ 1389 atmp = PetscAbsScalar(*aa); aa++; 1390 row = brow + krow; /* row index */ 1391 /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */ 1392 if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1393 } 1394 } 1395 aj++; 1396 } 1397 } 1398 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1399 PetscFunctionReturn(0); 1400 } 1401 1402 #undef __FUNCT__ 1403 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1404 int MatSetUpPreallocation_SeqBAIJ(Mat A) 1405 { 1406 int ierr; 1407 1408 PetscFunctionBegin; 1409 ierr = MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1410 PetscFunctionReturn(0); 1411 } 1412 1413 #undef __FUNCT__ 1414 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1415 int MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1416 { 1417 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1418 PetscFunctionBegin; 1419 *array = a->a; 1420 PetscFunctionReturn(0); 1421 } 1422 1423 #undef __FUNCT__ 1424 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1425 int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1426 { 1427 PetscFunctionBegin; 1428 PetscFunctionReturn(0); 1429 } 1430 1431 #include "petscblaslapack.h" 1432 #undef __FUNCT__ 1433 #define __FUNCT__ "MatAXPY_SeqBAIJ" 1434 int MatAXPY_SeqBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str) 1435 { 1436 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 1437 int ierr,one=1,i,bs=y->bs,j,bs2; 1438 1439 PetscFunctionBegin; 1440 if (str == SAME_NONZERO_PATTERN) { 1441 BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one); 1442 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1443 if (y->xtoy && y->XtoY != X) { 1444 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1445 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1446 } 1447 if (!y->xtoy) { /* get xtoy */ 1448 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1449 y->XtoY = X; 1450 } 1451 bs2 = bs*bs; 1452 for (i=0; i<x->nz; i++) { 1453 j = 0; 1454 while (j < bs2){ 1455 y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]); 1456 j++; 1457 } 1458 } 1459 PetscLogInfo(0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %d/%d = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)); 1460 } else { 1461 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1462 } 1463 PetscFunctionReturn(0); 1464 } 1465 1466 /* -------------------------------------------------------------------*/ 1467 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 1468 MatGetRow_SeqBAIJ, 1469 MatRestoreRow_SeqBAIJ, 1470 MatMult_SeqBAIJ_N, 1471 MatMultAdd_SeqBAIJ_N, 1472 MatMultTranspose_SeqBAIJ, 1473 MatMultTransposeAdd_SeqBAIJ, 1474 MatSolve_SeqBAIJ_N, 1475 0, 1476 0, 1477 0, 1478 MatLUFactor_SeqBAIJ, 1479 0, 1480 0, 1481 MatTranspose_SeqBAIJ, 1482 MatGetInfo_SeqBAIJ, 1483 MatEqual_SeqBAIJ, 1484 MatGetDiagonal_SeqBAIJ, 1485 MatDiagonalScale_SeqBAIJ, 1486 MatNorm_SeqBAIJ, 1487 0, 1488 MatAssemblyEnd_SeqBAIJ, 1489 0, 1490 MatSetOption_SeqBAIJ, 1491 MatZeroEntries_SeqBAIJ, 1492 MatZeroRows_SeqBAIJ, 1493 MatLUFactorSymbolic_SeqBAIJ, 1494 MatLUFactorNumeric_SeqBAIJ_N, 1495 0, 1496 0, 1497 MatSetUpPreallocation_SeqBAIJ, 1498 MatILUFactorSymbolic_SeqBAIJ, 1499 0, 1500 MatGetArray_SeqBAIJ, 1501 MatRestoreArray_SeqBAIJ, 1502 MatDuplicate_SeqBAIJ, 1503 0, 1504 0, 1505 MatILUFactor_SeqBAIJ, 1506 0, 1507 MatAXPY_SeqBAIJ, 1508 MatGetSubMatrices_SeqBAIJ, 1509 MatIncreaseOverlap_SeqBAIJ, 1510 MatGetValues_SeqBAIJ, 1511 0, 1512 MatPrintHelp_SeqBAIJ, 1513 MatScale_SeqBAIJ, 1514 0, 1515 0, 1516 0, 1517 MatGetBlockSize_SeqBAIJ, 1518 MatGetRowIJ_SeqBAIJ, 1519 MatRestoreRowIJ_SeqBAIJ, 1520 0, 1521 0, 1522 0, 1523 0, 1524 0, 1525 0, 1526 MatSetValuesBlocked_SeqBAIJ, 1527 MatGetSubMatrix_SeqBAIJ, 1528 MatDestroy_SeqBAIJ, 1529 MatView_SeqBAIJ, 1530 MatGetPetscMaps_Petsc, 1531 0, 1532 0, 1533 0, 1534 0, 1535 0, 1536 0, 1537 MatGetRowMax_SeqBAIJ, 1538 MatConvert_Basic}; 1539 1540 EXTERN_C_BEGIN 1541 #undef __FUNCT__ 1542 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 1543 int MatStoreValues_SeqBAIJ(Mat mat) 1544 { 1545 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1546 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1547 int ierr; 1548 1549 PetscFunctionBegin; 1550 if (aij->nonew != 1) { 1551 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1552 } 1553 1554 /* allocate space for values if not already there */ 1555 if (!aij->saved_values) { 1556 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1557 } 1558 1559 /* copy values over */ 1560 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1561 PetscFunctionReturn(0); 1562 } 1563 EXTERN_C_END 1564 1565 EXTERN_C_BEGIN 1566 #undef __FUNCT__ 1567 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 1568 int MatRetrieveValues_SeqBAIJ(Mat mat) 1569 { 1570 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1571 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 1572 1573 PetscFunctionBegin; 1574 if (aij->nonew != 1) { 1575 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1576 } 1577 if (!aij->saved_values) { 1578 SETERRQ(1,"Must call MatStoreValues(A);first"); 1579 } 1580 1581 /* copy values over */ 1582 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1583 PetscFunctionReturn(0); 1584 } 1585 EXTERN_C_END 1586 1587 EXTERN_C_BEGIN 1588 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*); 1589 EXTERN_C_END 1590 1591 EXTERN_C_BEGIN 1592 #undef __FUNCT__ 1593 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 1594 int MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,int bs,int nz,int *nnz) 1595 { 1596 Mat_SeqBAIJ *b; 1597 int i,len,ierr,mbs,nbs,bs2,newbs = bs; 1598 PetscTruth flg; 1599 1600 PetscFunctionBegin; 1601 1602 B->preallocated = PETSC_TRUE; 1603 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr); 1604 if (nnz && newbs != bs) { 1605 SETERRQ(1,"Cannot change blocksize from command line if setting nnz"); 1606 } 1607 bs = newbs; 1608 1609 mbs = B->m/bs; 1610 nbs = B->n/bs; 1611 bs2 = bs*bs; 1612 1613 if (mbs*bs!=B->m || nbs*bs!=B->n) { 1614 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs); 1615 } 1616 1617 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1618 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1619 if (nnz) { 1620 for (i=0; i<mbs; i++) { 1621 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1622 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs); 1623 } 1624 } 1625 1626 b = (Mat_SeqBAIJ*)B->data; 1627 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1628 B->ops->solve = MatSolve_SeqBAIJ_Update; 1629 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 1630 if (!flg) { 1631 switch (bs) { 1632 case 1: 1633 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1634 B->ops->mult = MatMult_SeqBAIJ_1; 1635 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 1636 break; 1637 case 2: 1638 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 1639 B->ops->mult = MatMult_SeqBAIJ_2; 1640 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 1641 break; 1642 case 3: 1643 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 1644 B->ops->mult = MatMult_SeqBAIJ_3; 1645 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 1646 break; 1647 case 4: 1648 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 1649 B->ops->mult = MatMult_SeqBAIJ_4; 1650 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 1651 break; 1652 case 5: 1653 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 1654 B->ops->mult = MatMult_SeqBAIJ_5; 1655 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 1656 break; 1657 case 6: 1658 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 1659 B->ops->mult = MatMult_SeqBAIJ_6; 1660 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 1661 break; 1662 case 7: 1663 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 1664 B->ops->mult = MatMult_SeqBAIJ_7; 1665 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 1666 break; 1667 default: 1668 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 1669 B->ops->mult = MatMult_SeqBAIJ_N; 1670 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 1671 break; 1672 } 1673 } 1674 b->bs = bs; 1675 b->mbs = mbs; 1676 b->nbs = nbs; 1677 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 1678 if (!nnz) { 1679 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1680 else if (nz <= 0) nz = 1; 1681 for (i=0; i<mbs; i++) b->imax[i] = nz; 1682 nz = nz*mbs; 1683 } else { 1684 nz = 0; 1685 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1686 } 1687 1688 /* allocate the matrix space */ 1689 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 1690 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1691 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1692 b->j = (int*)(b->a + nz*bs2); 1693 ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 1694 b->i = b->j + nz; 1695 b->singlemalloc = PETSC_TRUE; 1696 1697 b->i[0] = 0; 1698 for (i=1; i<mbs+1; i++) { 1699 b->i[i] = b->i[i-1] + b->imax[i-1]; 1700 } 1701 1702 /* b->ilen will count nonzeros in each block row so far. */ 1703 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 1704 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1705 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1706 1707 b->bs = bs; 1708 b->bs2 = bs2; 1709 b->mbs = mbs; 1710 b->nz = 0; 1711 b->maxnz = nz*bs2; 1712 B->info.nz_unneeded = (PetscReal)b->maxnz; 1713 PetscFunctionReturn(0); 1714 } 1715 EXTERN_C_END 1716 1717 EXTERN_C_BEGIN 1718 #undef __FUNCT__ 1719 #define __FUNCT__ "MatCreate_SeqBAIJ" 1720 int MatCreate_SeqBAIJ(Mat B) 1721 { 1722 int ierr,size; 1723 Mat_SeqBAIJ *b; 1724 1725 PetscFunctionBegin; 1726 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1727 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1728 1729 B->m = B->M = PetscMax(B->m,B->M); 1730 B->n = B->N = PetscMax(B->n,B->N); 1731 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 1732 B->data = (void*)b; 1733 ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 1734 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1735 B->factor = 0; 1736 B->lupivotthreshold = 1.0; 1737 B->mapping = 0; 1738 b->row = 0; 1739 b->col = 0; 1740 b->icol = 0; 1741 b->reallocs = 0; 1742 b->saved_values = 0; 1743 #if defined(PETSC_USE_MAT_SINGLE) 1744 b->setvalueslen = 0; 1745 b->setvaluescopy = PETSC_NULL; 1746 #endif 1747 1748 ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr); 1749 ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr); 1750 1751 b->sorted = PETSC_FALSE; 1752 b->roworiented = PETSC_TRUE; 1753 b->nonew = 0; 1754 b->diag = 0; 1755 b->solve_work = 0; 1756 b->mult_work = 0; 1757 B->spptr = 0; 1758 B->info.nz_unneeded = (PetscReal)b->maxnz; 1759 b->keepzeroedrows = PETSC_FALSE; 1760 b->xtoy = 0; 1761 b->XtoY = 0; 1762 1763 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1764 "MatStoreValues_SeqBAIJ", 1765 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 1766 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1767 "MatRetrieveValues_SeqBAIJ", 1768 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 1769 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 1770 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 1771 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 1772 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 1773 "MatConvert_SeqBAIJ_SeqAIJ", 1774 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 1775 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 1776 "MatSeqBAIJSetPreallocation_SeqBAIJ", 1777 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 1778 PetscFunctionReturn(0); 1779 } 1780 EXTERN_C_END 1781 1782 #undef __FUNCT__ 1783 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 1784 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1785 { 1786 Mat C; 1787 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 1788 int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr; 1789 1790 PetscFunctionBegin; 1791 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1792 1793 *B = 0; 1794 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1795 ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr); 1796 c = (Mat_SeqBAIJ*)C->data; 1797 1798 c->bs = a->bs; 1799 c->bs2 = a->bs2; 1800 c->mbs = a->mbs; 1801 c->nbs = a->nbs; 1802 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1803 1804 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1805 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1806 for (i=0; i<mbs; i++) { 1807 c->imax[i] = a->imax[i]; 1808 c->ilen[i] = a->ilen[i]; 1809 } 1810 1811 /* allocate the matrix space */ 1812 c->singlemalloc = PETSC_TRUE; 1813 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1814 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1815 c->j = (int*)(c->a + nz*bs2); 1816 c->i = c->j + nz; 1817 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1818 if (mbs > 0) { 1819 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1820 if (cpvalues == MAT_COPY_VALUES) { 1821 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1822 } else { 1823 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1824 } 1825 } 1826 1827 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1828 c->sorted = a->sorted; 1829 c->roworiented = a->roworiented; 1830 c->nonew = a->nonew; 1831 1832 if (a->diag) { 1833 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1834 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1835 for (i=0; i<mbs; i++) { 1836 c->diag[i] = a->diag[i]; 1837 } 1838 } else c->diag = 0; 1839 c->nz = a->nz; 1840 c->maxnz = a->maxnz; 1841 c->solve_work = 0; 1842 C->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1843 c->mult_work = 0; 1844 C->preallocated = PETSC_TRUE; 1845 C->assembled = PETSC_TRUE; 1846 *B = C; 1847 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1848 PetscFunctionReturn(0); 1849 } 1850 1851 EXTERN_C_BEGIN 1852 #undef __FUNCT__ 1853 #define __FUNCT__ "MatLoad_SeqBAIJ" 1854 int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A) 1855 { 1856 Mat_SeqBAIJ *a; 1857 Mat B; 1858 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1859 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1860 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1861 int *masked,nmask,tmp,bs2,ishift; 1862 PetscScalar *aa; 1863 MPI_Comm comm = ((PetscObject)viewer)->comm; 1864 1865 PetscFunctionBegin; 1866 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1867 bs2 = bs*bs; 1868 1869 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1870 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1871 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1872 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1873 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1874 M = header[1]; N = header[2]; nz = header[3]; 1875 1876 if (header[3] < 0) { 1877 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 1878 } 1879 1880 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1881 1882 /* 1883 This code adds extra rows to make sure the number of rows is 1884 divisible by the blocksize 1885 */ 1886 mbs = M/bs; 1887 extra_rows = bs - M + bs*(mbs); 1888 if (extra_rows == bs) extra_rows = 0; 1889 else mbs++; 1890 if (extra_rows) { 1891 PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 1892 } 1893 1894 /* read in row lengths */ 1895 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1896 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1897 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1898 1899 /* read in column indices */ 1900 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1901 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1902 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1903 1904 /* loop over row lengths determining block row lengths */ 1905 ierr = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr); 1906 ierr = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1907 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1908 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1909 masked = mask + mbs; 1910 rowcount = 0; nzcount = 0; 1911 for (i=0; i<mbs; i++) { 1912 nmask = 0; 1913 for (j=0; j<bs; j++) { 1914 kmax = rowlengths[rowcount]; 1915 for (k=0; k<kmax; k++) { 1916 tmp = jj[nzcount++]/bs; 1917 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1918 } 1919 rowcount++; 1920 } 1921 browlengths[i] += nmask; 1922 /* zero out the mask elements we set */ 1923 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1924 } 1925 1926 /* create our matrix */ 1927 ierr = MatCreate(comm,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows,&B); 1928 ierr = MatSetType(B,type);CHKERRQ(ierr); 1929 ierr = MatSeqBAIJSetPreallocation(B,bs,0,browlengths);CHKERRQ(ierr); 1930 a = (Mat_SeqBAIJ*)B->data; 1931 1932 /* set matrix "i" values */ 1933 a->i[0] = 0; 1934 for (i=1; i<= mbs; i++) { 1935 a->i[i] = a->i[i-1] + browlengths[i-1]; 1936 a->ilen[i-1] = browlengths[i-1]; 1937 } 1938 a->nz = 0; 1939 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 1940 1941 /* read in nonzero values */ 1942 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1943 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1944 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1945 1946 /* set "a" and "j" values into matrix */ 1947 nzcount = 0; jcount = 0; 1948 for (i=0; i<mbs; i++) { 1949 nzcountb = nzcount; 1950 nmask = 0; 1951 for (j=0; j<bs; j++) { 1952 kmax = rowlengths[i*bs+j]; 1953 for (k=0; k<kmax; k++) { 1954 tmp = jj[nzcount++]/bs; 1955 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1956 } 1957 } 1958 /* sort the masked values */ 1959 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1960 1961 /* set "j" values into matrix */ 1962 maskcount = 1; 1963 for (j=0; j<nmask; j++) { 1964 a->j[jcount++] = masked[j]; 1965 mask[masked[j]] = maskcount++; 1966 } 1967 /* set "a" values into matrix */ 1968 ishift = bs2*a->i[i]; 1969 for (j=0; j<bs; j++) { 1970 kmax = rowlengths[i*bs+j]; 1971 for (k=0; k<kmax; k++) { 1972 tmp = jj[nzcountb]/bs ; 1973 block = mask[tmp] - 1; 1974 point = jj[nzcountb] - bs*tmp; 1975 idx = ishift + bs2*block + j + bs*point; 1976 a->a[idx] = (MatScalar)aa[nzcountb++]; 1977 } 1978 } 1979 /* zero out the mask elements we set */ 1980 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1981 } 1982 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1983 1984 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1985 ierr = PetscFree(browlengths);CHKERRQ(ierr); 1986 ierr = PetscFree(aa);CHKERRQ(ierr); 1987 ierr = PetscFree(jj);CHKERRQ(ierr); 1988 ierr = PetscFree(mask);CHKERRQ(ierr); 1989 1990 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1991 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1992 ierr = MatView_Private(B);CHKERRQ(ierr); 1993 1994 *A = B; 1995 PetscFunctionReturn(0); 1996 } 1997 EXTERN_C_END 1998 1999 #undef __FUNCT__ 2000 #define __FUNCT__ "MatCreateSeqBAIJ" 2001 /*@C 2002 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 2003 compressed row) format. For good matrix assembly performance the 2004 user should preallocate the matrix storage by setting the parameter nz 2005 (or the array nnz). By setting these parameters accurately, performance 2006 during matrix assembly can be increased by more than a factor of 50. 2007 2008 Collective on MPI_Comm 2009 2010 Input Parameters: 2011 + comm - MPI communicator, set to PETSC_COMM_SELF 2012 . bs - size of block 2013 . m - number of rows 2014 . n - number of columns 2015 . nz - number of nonzero blocks per block row (same for all rows) 2016 - nnz - array containing the number of nonzero blocks in the various block rows 2017 (possibly different for each block row) or PETSC_NULL 2018 2019 Output Parameter: 2020 . A - the matrix 2021 2022 Options Database Keys: 2023 . -mat_no_unroll - uses code that does not unroll the loops in the 2024 block calculations (much slower) 2025 . -mat_block_size - size of the blocks to use 2026 2027 Level: intermediate 2028 2029 Notes: 2030 A nonzero block is any block that as 1 or more nonzeros in it 2031 2032 The block AIJ format is fully compatible with standard Fortran 77 2033 storage. That is, the stored row and column indices can begin at 2034 either one (as in Fortran) or zero. See the users' manual for details. 2035 2036 Specify the preallocated storage with either nz or nnz (not both). 2037 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2038 allocation. For additional details, see the users manual chapter on 2039 matrices. 2040 2041 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2042 @*/ 2043 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A) 2044 { 2045 int ierr; 2046 2047 PetscFunctionBegin; 2048 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 2049 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2050 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 2051 PetscFunctionReturn(0); 2052 } 2053 2054 #undef __FUNCT__ 2055 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 2056 /*@C 2057 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 2058 per row in the matrix. For good matrix assembly performance the 2059 user should preallocate the matrix storage by setting the parameter nz 2060 (or the array nnz). By setting these parameters accurately, performance 2061 during matrix assembly can be increased by more than a factor of 50. 2062 2063 Collective on MPI_Comm 2064 2065 Input Parameters: 2066 + A - the matrix 2067 . bs - size of block 2068 . nz - number of block nonzeros per block row (same for all rows) 2069 - nnz - array containing the number of block nonzeros in the various block rows 2070 (possibly different for each block row) or PETSC_NULL 2071 2072 Options Database Keys: 2073 . -mat_no_unroll - uses code that does not unroll the loops in the 2074 block calculations (much slower) 2075 . -mat_block_size - size of the blocks to use 2076 2077 Level: intermediate 2078 2079 Notes: 2080 The block AIJ format is fully compatible with standard Fortran 77 2081 storage. That is, the stored row and column indices can begin at 2082 either one (as in Fortran) or zero. See the users' manual for details. 2083 2084 Specify the preallocated storage with either nz or nnz (not both). 2085 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2086 allocation. For additional details, see the users manual chapter on 2087 matrices. 2088 2089 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2090 @*/ 2091 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[]) 2092 { 2093 int ierr,(*f)(Mat,int,int,const int[]); 2094 2095 PetscFunctionBegin; 2096 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2097 if (f) { 2098 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 2099 } 2100 PetscFunctionReturn(0); 2101 } 2102