1 /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/ 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" 8 #include "src/vec/vecimpl.h" 9 #include "src/inline/spops.h" 10 #include "petscsys.h" /*I "petscmat.h" I*/ 11 12 /* 13 Special version for Fun3d sequential benchmark 14 */ 15 #if defined(PETSC_HAVE_FORTRAN_CAPS) 16 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 17 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 18 #define matsetvaluesblocked4_ matsetvaluesblocked4 19 #endif 20 21 EXTERN_C_BEGIN 22 #undef __FUNCT__ 23 #define __FUNCT__ "matsetvaluesblocked4_" 24 void matsetvaluesblocked4_(Mat *AA,int *mm,const int im[],int *nn,const int in[],const PetscScalar v[]) 25 { 26 Mat A = *AA; 27 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 28 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 29 int *ai=a->i,*ailen=a->ilen; 30 int *aj=a->j,stepval; 31 const PetscScalar *value = v; 32 MatScalar *ap,*aa = a->a,*bap; 33 34 PetscFunctionBegin; 35 stepval = (n-1)*4; 36 for (k=0; k<m; k++) { /* loop over added rows */ 37 row = im[k]; 38 rp = aj + ai[row]; 39 ap = aa + 16*ai[row]; 40 nrow = ailen[row]; 41 low = 0; 42 for (l=0; l<n; l++) { /* loop over added columns */ 43 col = in[l]; 44 value = v + k*(stepval+4)*4 + l*4; 45 low = 0; high = nrow; 46 while (high-low > 7) { 47 t = (low+high)/2; 48 if (rp[t] > col) high = t; 49 else low = t; 50 } 51 for (i=low; i<high; i++) { 52 if (rp[i] > col) break; 53 if (rp[i] == col) { 54 bap = ap + 16*i; 55 for (ii=0; ii<4; ii++,value+=stepval) { 56 for (jj=ii; jj<16; jj+=4) { 57 bap[jj] += *value++; 58 } 59 } 60 goto noinsert2; 61 } 62 } 63 N = nrow++ - 1; 64 /* shift up all the later entries in this row */ 65 for (ii=N; ii>=i; ii--) { 66 rp[ii+1] = rp[ii]; 67 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 68 } 69 if (N >= i) { 70 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 71 } 72 rp[i] = col; 73 bap = ap + 16*i; 74 for (ii=0; ii<4; ii++,value+=stepval) { 75 for (jj=ii; jj<16; jj+=4) { 76 bap[jj] = *value++; 77 } 78 } 79 noinsert2:; 80 low = i; 81 } 82 ailen[row] = nrow; 83 } 84 } 85 EXTERN_C_END 86 87 #if defined(PETSC_HAVE_FORTRAN_CAPS) 88 #define matsetvalues4_ MATSETVALUES4 89 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 90 #define matsetvalues4_ matsetvalues4 91 #endif 92 93 EXTERN_C_BEGIN 94 #undef __FUNCT__ 95 #define __FUNCT__ "MatSetValues4_" 96 void matsetvalues4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v) 97 { 98 Mat A = *AA; 99 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 100 int *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 101 int *ai=a->i,*ailen=a->ilen; 102 int *aj=a->j,brow,bcol; 103 int ridx,cidx; 104 MatScalar *ap,value,*aa=a->a,*bap; 105 106 PetscFunctionBegin; 107 for (k=0; k<m; k++) { /* loop over added rows */ 108 row = im[k]; brow = row/4; 109 rp = aj + ai[brow]; 110 ap = aa + 16*ai[brow]; 111 nrow = ailen[brow]; 112 low = 0; 113 for (l=0; l<n; l++) { /* loop over added columns */ 114 col = in[l]; bcol = col/4; 115 ridx = row % 4; cidx = col % 4; 116 value = v[l + k*n]; 117 low = 0; high = nrow; 118 while (high-low > 7) { 119 t = (low+high)/2; 120 if (rp[t] > bcol) high = t; 121 else low = t; 122 } 123 for (i=low; i<high; i++) { 124 if (rp[i] > bcol) break; 125 if (rp[i] == bcol) { 126 bap = ap + 16*i + 4*cidx + ridx; 127 *bap += value; 128 goto noinsert1; 129 } 130 } 131 N = nrow++ - 1; 132 /* shift up all the later entries in this row */ 133 for (ii=N; ii>=i; ii--) { 134 rp[ii+1] = rp[ii]; 135 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 136 } 137 if (N>=i) { 138 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 139 } 140 rp[i] = bcol; 141 ap[16*i + 4*cidx + ridx] = value; 142 noinsert1:; 143 low = i; 144 } 145 ailen[brow] = nrow; 146 } 147 } 148 EXTERN_C_END 149 150 /* UGLY, ugly, ugly 151 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 152 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 153 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 154 converts the entries into single precision and then calls ..._MatScalar() to put them 155 into the single precision data structures. 156 */ 157 #if defined(PETSC_USE_MAT_SINGLE) 158 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 159 #else 160 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 161 #endif 162 163 #define CHUNKSIZE 10 164 165 /* 166 Checks for missing diagonals 167 */ 168 #undef __FUNCT__ 169 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 170 int MatMissingDiagonal_SeqBAIJ(Mat A) 171 { 172 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 173 int *diag,*jj = a->j,i,ierr; 174 175 PetscFunctionBegin; 176 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 177 diag = a->diag; 178 for (i=0; i<a->mbs; i++) { 179 if (jj[diag[i]] != i) { 180 SETERRQ1(1,"Matrix is missing diagonal number %d",i); 181 } 182 } 183 PetscFunctionReturn(0); 184 } 185 186 #undef __FUNCT__ 187 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 188 int MatMarkDiagonal_SeqBAIJ(Mat A) 189 { 190 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 191 int i,j,*diag,m = a->mbs,ierr; 192 193 PetscFunctionBegin; 194 if (a->diag) PetscFunctionReturn(0); 195 196 ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr); 197 PetscLogObjectMemory(A,(m+1)*sizeof(int)); 198 for (i=0; i<m; i++) { 199 diag[i] = a->i[i+1]; 200 for (j=a->i[i]; j<a->i[i+1]; j++) { 201 if (a->j[j] == i) { 202 diag[i] = j; 203 break; 204 } 205 } 206 } 207 a->diag = diag; 208 PetscFunctionReturn(0); 209 } 210 211 212 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 213 214 #undef __FUNCT__ 215 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 216 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done) 217 { 218 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 219 int ierr,n = a->mbs,i; 220 221 PetscFunctionBegin; 222 *nn = n; 223 if (!ia) PetscFunctionReturn(0); 224 if (symmetric) { 225 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 226 } else if (oshift == 1) { 227 /* temporarily add 1 to i and j indices */ 228 int nz = a->i[n]; 229 for (i=0; i<nz; i++) a->j[i]++; 230 for (i=0; i<n+1; i++) a->i[i]++; 231 *ia = a->i; *ja = a->j; 232 } else { 233 *ia = a->i; *ja = a->j; 234 } 235 236 PetscFunctionReturn(0); 237 } 238 239 #undef __FUNCT__ 240 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 241 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done) 242 { 243 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 244 int i,n = a->mbs,ierr; 245 246 PetscFunctionBegin; 247 if (!ia) PetscFunctionReturn(0); 248 if (symmetric) { 249 ierr = PetscFree(*ia);CHKERRQ(ierr); 250 ierr = PetscFree(*ja);CHKERRQ(ierr); 251 } else if (oshift == 1) { 252 int nz = a->i[n]-1; 253 for (i=0; i<nz; i++) a->j[i]--; 254 for (i=0; i<n+1; i++) a->i[i]--; 255 } 256 PetscFunctionReturn(0); 257 } 258 259 #undef __FUNCT__ 260 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ" 261 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs) 262 { 263 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 264 265 PetscFunctionBegin; 266 *bs = baij->bs; 267 PetscFunctionReturn(0); 268 } 269 270 271 #undef __FUNCT__ 272 #define __FUNCT__ "MatDestroy_SeqBAIJ" 273 int MatDestroy_SeqBAIJ(Mat A) 274 { 275 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 276 int ierr; 277 278 PetscFunctionBegin; 279 #if defined(PETSC_USE_LOG) 280 PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz); 281 #endif 282 ierr = PetscFree(a->a);CHKERRQ(ierr); 283 if (!a->singlemalloc) { 284 ierr = PetscFree(a->i);CHKERRQ(ierr); 285 ierr = PetscFree(a->j);CHKERRQ(ierr); 286 } 287 if (a->row) { 288 ierr = ISDestroy(a->row);CHKERRQ(ierr); 289 } 290 if (a->col) { 291 ierr = ISDestroy(a->col);CHKERRQ(ierr); 292 } 293 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 294 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 295 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 296 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 297 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 298 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 299 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 300 #if defined(PETSC_USE_MAT_SINGLE) 301 if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);} 302 #endif 303 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 304 305 ierr = PetscFree(a);CHKERRQ(ierr); 306 PetscFunctionReturn(0); 307 } 308 309 #undef __FUNCT__ 310 #define __FUNCT__ "MatSetOption_SeqBAIJ" 311 int MatSetOption_SeqBAIJ(Mat A,MatOption op) 312 { 313 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 314 315 PetscFunctionBegin; 316 switch (op) { 317 case MAT_ROW_ORIENTED: 318 a->roworiented = PETSC_TRUE; 319 break; 320 case MAT_COLUMN_ORIENTED: 321 a->roworiented = PETSC_FALSE; 322 break; 323 case MAT_COLUMNS_SORTED: 324 a->sorted = PETSC_TRUE; 325 break; 326 case MAT_COLUMNS_UNSORTED: 327 a->sorted = PETSC_FALSE; 328 break; 329 case MAT_KEEP_ZEROED_ROWS: 330 a->keepzeroedrows = PETSC_TRUE; 331 break; 332 case MAT_NO_NEW_NONZERO_LOCATIONS: 333 a->nonew = 1; 334 break; 335 case MAT_NEW_NONZERO_LOCATION_ERR: 336 a->nonew = -1; 337 break; 338 case MAT_NEW_NONZERO_ALLOCATION_ERR: 339 a->nonew = -2; 340 break; 341 case MAT_YES_NEW_NONZERO_LOCATIONS: 342 a->nonew = 0; 343 break; 344 case MAT_ROWS_SORTED: 345 case MAT_ROWS_UNSORTED: 346 case MAT_YES_NEW_DIAGONALS: 347 case MAT_IGNORE_OFF_PROC_ENTRIES: 348 case MAT_USE_HASH_TABLE: 349 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 350 break; 351 case MAT_NO_NEW_DIAGONALS: 352 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 353 case MAT_SYMMETRIC: 354 case MAT_STRUCTURALLY_SYMMETRIC: 355 case MAT_NOT_SYMMETRIC: 356 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 357 case MAT_HERMITIAN: 358 case MAT_NOT_HERMITIAN: 359 case MAT_SYMMETRY_ETERNAL: 360 case MAT_NOT_SYMMETRY_ETERNAL: 361 break; 362 default: 363 SETERRQ(PETSC_ERR_SUP,"unknown option"); 364 } 365 PetscFunctionReturn(0); 366 } 367 368 #undef __FUNCT__ 369 #define __FUNCT__ "MatGetRow_SeqBAIJ" 370 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 371 { 372 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 373 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr; 374 MatScalar *aa,*aa_i; 375 PetscScalar *v_i; 376 377 PetscFunctionBegin; 378 bs = a->bs; 379 ai = a->i; 380 aj = a->j; 381 aa = a->a; 382 bs2 = a->bs2; 383 384 if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %d out of range", row); 385 386 bn = row/bs; /* Block number */ 387 bp = row % bs; /* Block Position */ 388 M = ai[bn+1] - ai[bn]; 389 *nz = bs*M; 390 391 if (v) { 392 *v = 0; 393 if (*nz) { 394 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 395 for (i=0; i<M; i++) { /* for each block in the block row */ 396 v_i = *v + i*bs; 397 aa_i = aa + bs2*(ai[bn] + i); 398 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 399 } 400 } 401 } 402 403 if (idx) { 404 *idx = 0; 405 if (*nz) { 406 ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr); 407 for (i=0; i<M; i++) { /* for each block in the block row */ 408 idx_i = *idx + i*bs; 409 itmp = bs*aj[ai[bn] + i]; 410 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 411 } 412 } 413 } 414 PetscFunctionReturn(0); 415 } 416 417 #undef __FUNCT__ 418 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 419 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 420 { 421 int ierr; 422 423 PetscFunctionBegin; 424 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 425 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 426 PetscFunctionReturn(0); 427 } 428 429 #undef __FUNCT__ 430 #define __FUNCT__ "MatTranspose_SeqBAIJ" 431 int MatTranspose_SeqBAIJ(Mat A,Mat *B) 432 { 433 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 434 Mat C; 435 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 436 int *rows,*cols,bs2=a->bs2; 437 PetscScalar *array; 438 439 PetscFunctionBegin; 440 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 441 ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr); 442 ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr); 443 444 #if defined(PETSC_USE_MAT_SINGLE) 445 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 446 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 447 #else 448 array = a->a; 449 #endif 450 451 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 452 ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr); 453 ierr = PetscFree(col);CHKERRQ(ierr); 454 ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr); 455 cols = rows + bs; 456 for (i=0; i<mbs; i++) { 457 cols[0] = i*bs; 458 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 459 len = ai[i+1] - ai[i]; 460 for (j=0; j<len; j++) { 461 rows[0] = (*aj++)*bs; 462 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 463 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 464 array += bs2; 465 } 466 } 467 ierr = PetscFree(rows);CHKERRQ(ierr); 468 #if defined(PETSC_USE_MAT_SINGLE) 469 ierr = PetscFree(array); 470 #endif 471 472 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 473 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 474 475 if (B) { 476 *B = C; 477 } else { 478 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 479 } 480 PetscFunctionReturn(0); 481 } 482 483 #undef __FUNCT__ 484 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 485 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 486 { 487 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 488 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 489 PetscScalar *aa; 490 FILE *file; 491 492 PetscFunctionBegin; 493 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 494 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 495 col_lens[0] = MAT_FILE_COOKIE; 496 497 col_lens[1] = A->m; 498 col_lens[2] = A->n; 499 col_lens[3] = a->nz*bs2; 500 501 /* store lengths of each row and write (including header) to file */ 502 count = 0; 503 for (i=0; i<a->mbs; i++) { 504 for (j=0; j<bs; j++) { 505 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 506 } 507 } 508 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 509 ierr = PetscFree(col_lens);CHKERRQ(ierr); 510 511 /* store column indices (zero start index) */ 512 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 513 count = 0; 514 for (i=0; i<a->mbs; i++) { 515 for (j=0; j<bs; j++) { 516 for (k=a->i[i]; k<a->i[i+1]; k++) { 517 for (l=0; l<bs; l++) { 518 jj[count++] = bs*a->j[k] + l; 519 } 520 } 521 } 522 } 523 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr); 524 ierr = PetscFree(jj);CHKERRQ(ierr); 525 526 /* store nonzero values */ 527 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 528 count = 0; 529 for (i=0; i<a->mbs; i++) { 530 for (j=0; j<bs; j++) { 531 for (k=a->i[i]; k<a->i[i+1]; k++) { 532 for (l=0; l<bs; l++) { 533 aa[count++] = a->a[bs2*k + l*bs + j]; 534 } 535 } 536 } 537 } 538 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr); 539 ierr = PetscFree(aa);CHKERRQ(ierr); 540 541 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 542 if (file) { 543 fprintf(file,"-matload_block_size %d\n",a->bs); 544 } 545 PetscFunctionReturn(0); 546 } 547 548 #undef __FUNCT__ 549 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 550 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 551 { 552 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 553 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 554 PetscViewerFormat format; 555 556 PetscFunctionBegin; 557 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 558 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 559 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 560 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 561 Mat aij; 562 ierr = MatConvert(A,MATSEQAIJ,&aij);CHKERRQ(ierr); 563 ierr = MatView(aij,viewer);CHKERRQ(ierr); 564 ierr = MatDestroy(aij);CHKERRQ(ierr); 565 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 566 PetscFunctionReturn(0); 567 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 568 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 569 for (i=0; i<a->mbs; i++) { 570 for (j=0; j<bs; j++) { 571 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 572 for (k=a->i[i]; k<a->i[i+1]; k++) { 573 for (l=0; l<bs; l++) { 574 #if defined(PETSC_USE_COMPLEX) 575 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 576 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %gi) ",bs*a->j[k]+l, 577 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 578 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 579 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %gi) ",bs*a->j[k]+l, 580 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 581 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 582 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 583 } 584 #else 585 if (a->a[bs2*k + l*bs + j] != 0.0) { 586 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 587 } 588 #endif 589 } 590 } 591 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 592 } 593 } 594 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 595 } else { 596 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 597 for (i=0; i<a->mbs; i++) { 598 for (j=0; j<bs; j++) { 599 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 600 for (k=a->i[i]; k<a->i[i+1]; k++) { 601 for (l=0; l<bs; l++) { 602 #if defined(PETSC_USE_COMPLEX) 603 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 604 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 605 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 606 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 607 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 608 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 609 } else { 610 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 611 } 612 #else 613 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 614 #endif 615 } 616 } 617 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 618 } 619 } 620 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 621 } 622 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 623 PetscFunctionReturn(0); 624 } 625 626 #undef __FUNCT__ 627 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 628 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 629 { 630 Mat A = (Mat) Aa; 631 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 632 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2; 633 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 634 MatScalar *aa; 635 PetscViewer viewer; 636 637 PetscFunctionBegin; 638 639 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 640 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 641 642 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 643 644 /* loop over matrix elements drawing boxes */ 645 color = PETSC_DRAW_BLUE; 646 for (i=0,row=0; i<mbs; i++,row+=bs) { 647 for (j=a->i[i]; j<a->i[i+1]; j++) { 648 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 649 x_l = a->j[j]*bs; x_r = x_l + 1.0; 650 aa = a->a + j*bs2; 651 for (k=0; k<bs; k++) { 652 for (l=0; l<bs; l++) { 653 if (PetscRealPart(*aa++) >= 0.) continue; 654 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 655 } 656 } 657 } 658 } 659 color = PETSC_DRAW_CYAN; 660 for (i=0,row=0; i<mbs; i++,row+=bs) { 661 for (j=a->i[i]; j<a->i[i+1]; j++) { 662 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 663 x_l = a->j[j]*bs; x_r = x_l + 1.0; 664 aa = a->a + j*bs2; 665 for (k=0; k<bs; k++) { 666 for (l=0; l<bs; l++) { 667 if (PetscRealPart(*aa++) != 0.) continue; 668 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 669 } 670 } 671 } 672 } 673 674 color = PETSC_DRAW_RED; 675 for (i=0,row=0; i<mbs; i++,row+=bs) { 676 for (j=a->i[i]; j<a->i[i+1]; j++) { 677 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 678 x_l = a->j[j]*bs; x_r = x_l + 1.0; 679 aa = a->a + j*bs2; 680 for (k=0; k<bs; k++) { 681 for (l=0; l<bs; l++) { 682 if (PetscRealPart(*aa++) <= 0.) continue; 683 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 684 } 685 } 686 } 687 } 688 PetscFunctionReturn(0); 689 } 690 691 #undef __FUNCT__ 692 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 693 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 694 { 695 int ierr; 696 PetscReal xl,yl,xr,yr,w,h; 697 PetscDraw draw; 698 PetscTruth isnull; 699 700 PetscFunctionBegin; 701 702 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 703 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 704 705 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 706 xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0; 707 xr += w; yr += h; xl = -w; yl = -h; 708 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 709 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 710 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 711 PetscFunctionReturn(0); 712 } 713 714 #undef __FUNCT__ 715 #define __FUNCT__ "MatView_SeqBAIJ" 716 int MatView_SeqBAIJ(Mat A,PetscViewer viewer) 717 { 718 int ierr; 719 PetscTruth issocket,isascii,isbinary,isdraw; 720 721 PetscFunctionBegin; 722 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 723 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 724 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 725 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 726 if (issocket) { 727 SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 728 } else if (isascii){ 729 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 730 } else if (isbinary) { 731 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 732 } else if (isdraw) { 733 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 734 } else { 735 SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name); 736 } 737 PetscFunctionReturn(0); 738 } 739 740 741 #undef __FUNCT__ 742 #define __FUNCT__ "MatGetValues_SeqBAIJ" 743 int MatGetValues_SeqBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[]) 744 { 745 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 746 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 747 int *ai = a->i,*ailen = a->ilen; 748 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 749 MatScalar *ap,*aa = a->a,zero = 0.0; 750 751 PetscFunctionBegin; 752 for (k=0; k<m; k++) { /* loop over rows */ 753 row = im[k]; brow = row/bs; 754 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 755 if (row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %d too large", row); 756 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 757 nrow = ailen[brow]; 758 for (l=0; l<n; l++) { /* loop over columns */ 759 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 760 if (in[l] >= A->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %d too large", in[l]); 761 col = in[l] ; 762 bcol = col/bs; 763 cidx = col%bs; 764 ridx = row%bs; 765 high = nrow; 766 low = 0; /* assume unsorted */ 767 while (high-low > 5) { 768 t = (low+high)/2; 769 if (rp[t] > bcol) high = t; 770 else low = t; 771 } 772 for (i=low; i<high; i++) { 773 if (rp[i] > bcol) break; 774 if (rp[i] == bcol) { 775 *v++ = ap[bs2*i+bs*cidx+ridx]; 776 goto finished; 777 } 778 } 779 *v++ = zero; 780 finished:; 781 } 782 } 783 PetscFunctionReturn(0); 784 } 785 786 #if defined(PETSC_USE_MAT_SINGLE) 787 #undef __FUNCT__ 788 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 789 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 790 { 791 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 792 int ierr,i,N = m*n*b->bs2; 793 MatScalar *vsingle; 794 795 PetscFunctionBegin; 796 if (N > b->setvalueslen) { 797 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 798 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 799 b->setvalueslen = N; 800 } 801 vsingle = b->setvaluescopy; 802 for (i=0; i<N; i++) { 803 vsingle[i] = v[i]; 804 } 805 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 806 PetscFunctionReturn(0); 807 } 808 #endif 809 810 811 #undef __FUNCT__ 812 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 813 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode is) 814 { 815 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 816 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 817 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 818 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 819 PetscTruth roworiented=a->roworiented; 820 const MatScalar *value = v; 821 MatScalar *ap,*aa = a->a,*bap; 822 823 PetscFunctionBegin; 824 if (roworiented) { 825 stepval = (n-1)*bs; 826 } else { 827 stepval = (m-1)*bs; 828 } 829 for (k=0; k<m; k++) { /* loop over added rows */ 830 row = im[k]; 831 if (row < 0) continue; 832 #if defined(PETSC_USE_BOPT_g) 833 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1); 834 #endif 835 rp = aj + ai[row]; 836 ap = aa + bs2*ai[row]; 837 rmax = imax[row]; 838 nrow = ailen[row]; 839 low = 0; 840 for (l=0; l<n; l++) { /* loop over added columns */ 841 if (in[l] < 0) continue; 842 #if defined(PETSC_USE_BOPT_g) 843 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],a->nbs-1); 844 #endif 845 col = in[l]; 846 if (roworiented) { 847 value = v + k*(stepval+bs)*bs + l*bs; 848 } else { 849 value = v + l*(stepval+bs)*bs + k*bs; 850 } 851 if (!sorted) low = 0; high = nrow; 852 while (high-low > 7) { 853 t = (low+high)/2; 854 if (rp[t] > col) high = t; 855 else low = t; 856 } 857 for (i=low; i<high; i++) { 858 if (rp[i] > col) break; 859 if (rp[i] == col) { 860 bap = ap + bs2*i; 861 if (roworiented) { 862 if (is == ADD_VALUES) { 863 for (ii=0; ii<bs; ii++,value+=stepval) { 864 for (jj=ii; jj<bs2; jj+=bs) { 865 bap[jj] += *value++; 866 } 867 } 868 } else { 869 for (ii=0; ii<bs; ii++,value+=stepval) { 870 for (jj=ii; jj<bs2; jj+=bs) { 871 bap[jj] = *value++; 872 } 873 } 874 } 875 } else { 876 if (is == ADD_VALUES) { 877 for (ii=0; ii<bs; ii++,value+=stepval) { 878 for (jj=0; jj<bs; jj++) { 879 *bap++ += *value++; 880 } 881 } 882 } else { 883 for (ii=0; ii<bs; ii++,value+=stepval) { 884 for (jj=0; jj<bs; jj++) { 885 *bap++ = *value++; 886 } 887 } 888 } 889 } 890 goto noinsert2; 891 } 892 } 893 if (nonew == 1) goto noinsert2; 894 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 895 if (nrow >= rmax) { 896 /* there is no extra room in row, therefore enlarge */ 897 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 898 MatScalar *new_a; 899 900 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 901 902 /* malloc new storage space */ 903 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 904 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 905 new_j = (int*)(new_a + bs2*new_nz); 906 new_i = new_j + new_nz; 907 908 /* copy over old data into new slots */ 909 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 910 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 911 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 912 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 913 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 914 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 915 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 916 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 917 /* free up old matrix storage */ 918 ierr = PetscFree(a->a);CHKERRQ(ierr); 919 if (!a->singlemalloc) { 920 ierr = PetscFree(a->i);CHKERRQ(ierr); 921 ierr = PetscFree(a->j);CHKERRQ(ierr); 922 } 923 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 924 a->singlemalloc = PETSC_TRUE; 925 926 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 927 rmax = imax[row] = imax[row] + CHUNKSIZE; 928 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 929 a->maxnz += bs2*CHUNKSIZE; 930 a->reallocs++; 931 a->nz++; 932 } 933 N = nrow++ - 1; 934 /* shift up all the later entries in this row */ 935 for (ii=N; ii>=i; ii--) { 936 rp[ii+1] = rp[ii]; 937 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 938 } 939 if (N >= i) { 940 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 941 } 942 rp[i] = col; 943 bap = ap + bs2*i; 944 if (roworiented) { 945 for (ii=0; ii<bs; ii++,value+=stepval) { 946 for (jj=ii; jj<bs2; jj+=bs) { 947 bap[jj] = *value++; 948 } 949 } 950 } else { 951 for (ii=0; ii<bs; ii++,value+=stepval) { 952 for (jj=0; jj<bs; jj++) { 953 *bap++ = *value++; 954 } 955 } 956 } 957 noinsert2:; 958 low = i; 959 } 960 ailen[row] = nrow; 961 } 962 PetscFunctionReturn(0); 963 } 964 965 #undef __FUNCT__ 966 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 967 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 968 { 969 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 970 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 971 int m = A->m,*ip,N,*ailen = a->ilen; 972 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 973 MatScalar *aa = a->a,*ap; 974 975 PetscFunctionBegin; 976 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 977 978 if (m) rmax = ailen[0]; 979 for (i=1; i<mbs; i++) { 980 /* move each row back by the amount of empty slots (fshift) before it*/ 981 fshift += imax[i-1] - ailen[i-1]; 982 rmax = PetscMax(rmax,ailen[i]); 983 if (fshift) { 984 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 985 N = ailen[i]; 986 for (j=0; j<N; j++) { 987 ip[j-fshift] = ip[j]; 988 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 989 } 990 } 991 ai[i] = ai[i-1] + ailen[i-1]; 992 } 993 if (mbs) { 994 fshift += imax[mbs-1] - ailen[mbs-1]; 995 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 996 } 997 /* reset ilen and imax for each row */ 998 for (i=0; i<mbs; i++) { 999 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1000 } 1001 a->nz = ai[mbs]; 1002 1003 /* diagonals may have moved, so kill the diagonal pointers */ 1004 if (fshift && a->diag) { 1005 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1006 PetscLogObjectMemory(A,-(mbs+1)*sizeof(int)); 1007 a->diag = 0; 1008 } 1009 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2); 1010 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs); 1011 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 1012 a->reallocs = 0; 1013 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1014 1015 PetscFunctionReturn(0); 1016 } 1017 1018 1019 1020 /* 1021 This function returns an array of flags which indicate the locations of contiguous 1022 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1023 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1024 Assume: sizes should be long enough to hold all the values. 1025 */ 1026 #undef __FUNCT__ 1027 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1028 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 1029 { 1030 int i,j,k,row; 1031 PetscTruth flg; 1032 1033 PetscFunctionBegin; 1034 for (i=0,j=0; i<n; j++) { 1035 row = idx[i]; 1036 if (row%bs!=0) { /* Not the begining of a block */ 1037 sizes[j] = 1; 1038 i++; 1039 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1040 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1041 i++; 1042 } else { /* Begining of the block, so check if the complete block exists */ 1043 flg = PETSC_TRUE; 1044 for (k=1; k<bs; k++) { 1045 if (row+k != idx[i+k]) { /* break in the block */ 1046 flg = PETSC_FALSE; 1047 break; 1048 } 1049 } 1050 if (flg == PETSC_TRUE) { /* No break in the bs */ 1051 sizes[j] = bs; 1052 i+= bs; 1053 } else { 1054 sizes[j] = 1; 1055 i++; 1056 } 1057 } 1058 } 1059 *bs_max = j; 1060 PetscFunctionReturn(0); 1061 } 1062 1063 #undef __FUNCT__ 1064 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1065 int MatZeroRows_SeqBAIJ(Mat A,IS is,const PetscScalar *diag) 1066 { 1067 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1068 int ierr,i,j,k,count,is_n,*is_idx,*rows; 1069 int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max; 1070 PetscScalar zero = 0.0; 1071 MatScalar *aa; 1072 1073 PetscFunctionBegin; 1074 /* Make a copy of the IS and sort it */ 1075 ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr); 1076 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1077 1078 /* allocate memory for rows,sizes */ 1079 ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr); 1080 sizes = rows + is_n; 1081 1082 /* copy IS values to rows, and sort them */ 1083 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1084 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1085 if (baij->keepzeroedrows) { 1086 for (i=0; i<is_n; i++) { sizes[i] = 1; } 1087 bs_max = is_n; 1088 } else { 1089 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1090 } 1091 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 1092 1093 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1094 row = rows[j]; 1095 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row); 1096 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1097 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 1098 if (sizes[i] == bs && !baij->keepzeroedrows) { 1099 if (diag) { 1100 if (baij->ilen[row/bs] > 0) { 1101 baij->ilen[row/bs] = 1; 1102 baij->j[baij->i[row/bs]] = row/bs; 1103 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 1104 } 1105 /* Now insert all the diagonal values for this bs */ 1106 for (k=0; k<bs; k++) { 1107 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 1108 } 1109 } else { /* (!diag) */ 1110 baij->ilen[row/bs] = 0; 1111 } /* end (!diag) */ 1112 } else { /* (sizes[i] != bs) */ 1113 #if defined (PETSC_USE_DEBUG) 1114 if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1"); 1115 #endif 1116 for (k=0; k<count; k++) { 1117 aa[0] = zero; 1118 aa += bs; 1119 } 1120 if (diag) { 1121 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1122 } 1123 } 1124 } 1125 1126 ierr = PetscFree(rows);CHKERRQ(ierr); 1127 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1128 PetscFunctionReturn(0); 1129 } 1130 1131 #undef __FUNCT__ 1132 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1133 int MatSetValues_SeqBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is) 1134 { 1135 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1136 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 1137 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1138 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 1139 int ridx,cidx,bs2=a->bs2,ierr; 1140 PetscTruth roworiented=a->roworiented; 1141 MatScalar *ap,value,*aa=a->a,*bap; 1142 1143 PetscFunctionBegin; 1144 for (k=0; k<m; k++) { /* loop over added rows */ 1145 row = im[k]; brow = row/bs; 1146 if (row < 0) continue; 1147 #if defined(PETSC_USE_BOPT_g) 1148 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1); 1149 #endif 1150 rp = aj + ai[brow]; 1151 ap = aa + bs2*ai[brow]; 1152 rmax = imax[brow]; 1153 nrow = ailen[brow]; 1154 low = 0; 1155 for (l=0; l<n; l++) { /* loop over added columns */ 1156 if (in[l] < 0) continue; 1157 #if defined(PETSC_USE_BOPT_g) 1158 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1); 1159 #endif 1160 col = in[l]; bcol = col/bs; 1161 ridx = row % bs; cidx = col % bs; 1162 if (roworiented) { 1163 value = v[l + k*n]; 1164 } else { 1165 value = v[k + l*m]; 1166 } 1167 if (!sorted) low = 0; high = nrow; 1168 while (high-low > 7) { 1169 t = (low+high)/2; 1170 if (rp[t] > bcol) high = t; 1171 else low = t; 1172 } 1173 for (i=low; i<high; i++) { 1174 if (rp[i] > bcol) break; 1175 if (rp[i] == bcol) { 1176 bap = ap + bs2*i + bs*cidx + ridx; 1177 if (is == ADD_VALUES) *bap += value; 1178 else *bap = value; 1179 goto noinsert1; 1180 } 1181 } 1182 if (nonew == 1) goto noinsert1; 1183 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 1184 if (nrow >= rmax) { 1185 /* there is no extra room in row, therefore enlarge */ 1186 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1187 MatScalar *new_a; 1188 1189 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 1190 1191 /* Malloc new storage space */ 1192 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1193 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1194 new_j = (int*)(new_a + bs2*new_nz); 1195 new_i = new_j + new_nz; 1196 1197 /* copy over old data into new slots */ 1198 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 1199 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1200 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 1201 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1202 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 1203 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1204 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1205 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1206 /* free up old matrix storage */ 1207 ierr = PetscFree(a->a);CHKERRQ(ierr); 1208 if (!a->singlemalloc) { 1209 ierr = PetscFree(a->i);CHKERRQ(ierr); 1210 ierr = PetscFree(a->j);CHKERRQ(ierr); 1211 } 1212 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1213 a->singlemalloc = PETSC_TRUE; 1214 1215 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1216 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1217 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 1218 a->maxnz += bs2*CHUNKSIZE; 1219 a->reallocs++; 1220 a->nz++; 1221 } 1222 N = nrow++ - 1; 1223 /* shift up all the later entries in this row */ 1224 for (ii=N; ii>=i; ii--) { 1225 rp[ii+1] = rp[ii]; 1226 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1227 } 1228 if (N>=i) { 1229 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1230 } 1231 rp[i] = bcol; 1232 ap[bs2*i + bs*cidx + ridx] = value; 1233 noinsert1:; 1234 low = i; 1235 } 1236 ailen[brow] = nrow; 1237 } 1238 PetscFunctionReturn(0); 1239 } 1240 1241 1242 #undef __FUNCT__ 1243 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1244 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info) 1245 { 1246 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1247 Mat outA; 1248 int ierr; 1249 PetscTruth row_identity,col_identity; 1250 1251 PetscFunctionBegin; 1252 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1253 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1254 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1255 if (!row_identity || !col_identity) { 1256 SETERRQ(1,"Row and column permutations must be identity for in-place ILU"); 1257 } 1258 1259 outA = inA; 1260 inA->factor = FACTOR_LU; 1261 1262 if (!a->diag) { 1263 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1264 } 1265 1266 a->row = row; 1267 a->col = col; 1268 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1269 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1270 1271 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1272 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1273 PetscLogObjectParent(inA,a->icol); 1274 1275 /* 1276 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1277 for ILU(0) factorization with natural ordering 1278 */ 1279 if (a->bs < 8) { 1280 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1281 } else { 1282 if (!a->solve_work) { 1283 ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1284 PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar)); 1285 } 1286 } 1287 1288 ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr); 1289 1290 PetscFunctionReturn(0); 1291 } 1292 #undef __FUNCT__ 1293 #define __FUNCT__ "MatPrintHelp_SeqBAIJ" 1294 int MatPrintHelp_SeqBAIJ(Mat A) 1295 { 1296 static PetscTruth called = PETSC_FALSE; 1297 MPI_Comm comm = A->comm; 1298 int ierr; 1299 1300 PetscFunctionBegin; 1301 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1302 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1303 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1304 PetscFunctionReturn(0); 1305 } 1306 1307 EXTERN_C_BEGIN 1308 #undef __FUNCT__ 1309 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1310 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices) 1311 { 1312 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1313 int i,nz,nbs; 1314 1315 PetscFunctionBegin; 1316 nz = baij->maxnz/baij->bs2; 1317 nbs = baij->nbs; 1318 for (i=0; i<nz; i++) { 1319 baij->j[i] = indices[i]; 1320 } 1321 baij->nz = nz; 1322 for (i=0; i<nbs; i++) { 1323 baij->ilen[i] = baij->imax[i]; 1324 } 1325 1326 PetscFunctionReturn(0); 1327 } 1328 EXTERN_C_END 1329 1330 #undef __FUNCT__ 1331 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1332 /*@ 1333 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1334 in the matrix. 1335 1336 Input Parameters: 1337 + mat - the SeqBAIJ matrix 1338 - indices - the column indices 1339 1340 Level: advanced 1341 1342 Notes: 1343 This can be called if you have precomputed the nonzero structure of the 1344 matrix and want to provide it to the matrix object to improve the performance 1345 of the MatSetValues() operation. 1346 1347 You MUST have set the correct numbers of nonzeros per row in the call to 1348 MatCreateSeqBAIJ(). 1349 1350 MUST be called before any calls to MatSetValues(); 1351 1352 @*/ 1353 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices) 1354 { 1355 int ierr,(*f)(Mat,int *); 1356 1357 PetscFunctionBegin; 1358 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1359 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1360 if (f) { 1361 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1362 } else { 1363 SETERRQ(1,"Wrong type of matrix to set column indices"); 1364 } 1365 PetscFunctionReturn(0); 1366 } 1367 1368 #undef __FUNCT__ 1369 #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1370 int MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1371 { 1372 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1373 int ierr,i,j,n,row,bs,*ai,*aj,mbs; 1374 PetscReal atmp; 1375 PetscScalar *x,zero = 0.0; 1376 MatScalar *aa; 1377 int ncols,brow,krow,kcol; 1378 1379 PetscFunctionBegin; 1380 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1381 bs = a->bs; 1382 aa = a->a; 1383 ai = a->i; 1384 aj = a->j; 1385 mbs = a->mbs; 1386 1387 ierr = VecSet(&zero,v);CHKERRQ(ierr); 1388 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1389 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1390 if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1391 for (i=0; i<mbs; i++) { 1392 ncols = ai[1] - ai[0]; ai++; 1393 brow = bs*i; 1394 for (j=0; j<ncols; j++){ 1395 /* bcol = bs*(*aj); */ 1396 for (kcol=0; kcol<bs; kcol++){ 1397 for (krow=0; krow<bs; krow++){ 1398 atmp = PetscAbsScalar(*aa); aa++; 1399 row = brow + krow; /* row index */ 1400 /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */ 1401 if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1402 } 1403 } 1404 aj++; 1405 } 1406 } 1407 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1408 PetscFunctionReturn(0); 1409 } 1410 1411 #undef __FUNCT__ 1412 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1413 int MatSetUpPreallocation_SeqBAIJ(Mat A) 1414 { 1415 int ierr; 1416 1417 PetscFunctionBegin; 1418 ierr = MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1419 PetscFunctionReturn(0); 1420 } 1421 1422 #undef __FUNCT__ 1423 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1424 int MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1425 { 1426 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1427 PetscFunctionBegin; 1428 *array = a->a; 1429 PetscFunctionReturn(0); 1430 } 1431 1432 #undef __FUNCT__ 1433 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1434 int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1435 { 1436 PetscFunctionBegin; 1437 PetscFunctionReturn(0); 1438 } 1439 1440 #include "petscblaslapack.h" 1441 #undef __FUNCT__ 1442 #define __FUNCT__ "MatAXPY_SeqBAIJ" 1443 int MatAXPY_SeqBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str) 1444 { 1445 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 1446 int ierr,one=1,i,bs=y->bs,j,bs2; 1447 1448 PetscFunctionBegin; 1449 if (str == SAME_NONZERO_PATTERN) { 1450 BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one); 1451 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1452 if (y->xtoy && y->XtoY != X) { 1453 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1454 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1455 } 1456 if (!y->xtoy) { /* get xtoy */ 1457 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1458 y->XtoY = X; 1459 } 1460 bs2 = bs*bs; 1461 for (i=0; i<x->nz; i++) { 1462 j = 0; 1463 while (j < bs2){ 1464 y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]); 1465 j++; 1466 } 1467 } 1468 PetscLogInfo(0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %d/%d = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)); 1469 } else { 1470 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1471 } 1472 PetscFunctionReturn(0); 1473 } 1474 1475 /* -------------------------------------------------------------------*/ 1476 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 1477 MatGetRow_SeqBAIJ, 1478 MatRestoreRow_SeqBAIJ, 1479 MatMult_SeqBAIJ_N, 1480 /* 4*/ MatMultAdd_SeqBAIJ_N, 1481 MatMultTranspose_SeqBAIJ, 1482 MatMultTransposeAdd_SeqBAIJ, 1483 MatSolve_SeqBAIJ_N, 1484 0, 1485 0, 1486 /*10*/ 0, 1487 MatLUFactor_SeqBAIJ, 1488 0, 1489 0, 1490 MatTranspose_SeqBAIJ, 1491 /*15*/ MatGetInfo_SeqBAIJ, 1492 MatEqual_SeqBAIJ, 1493 MatGetDiagonal_SeqBAIJ, 1494 MatDiagonalScale_SeqBAIJ, 1495 MatNorm_SeqBAIJ, 1496 /*20*/ 0, 1497 MatAssemblyEnd_SeqBAIJ, 1498 0, 1499 MatSetOption_SeqBAIJ, 1500 MatZeroEntries_SeqBAIJ, 1501 /*25*/ MatZeroRows_SeqBAIJ, 1502 MatLUFactorSymbolic_SeqBAIJ, 1503 MatLUFactorNumeric_SeqBAIJ_N, 1504 0, 1505 0, 1506 /*30*/ MatSetUpPreallocation_SeqBAIJ, 1507 MatILUFactorSymbolic_SeqBAIJ, 1508 0, 1509 MatGetArray_SeqBAIJ, 1510 MatRestoreArray_SeqBAIJ, 1511 /*35*/ MatDuplicate_SeqBAIJ, 1512 0, 1513 0, 1514 MatILUFactor_SeqBAIJ, 1515 0, 1516 /*40*/ MatAXPY_SeqBAIJ, 1517 MatGetSubMatrices_SeqBAIJ, 1518 MatIncreaseOverlap_SeqBAIJ, 1519 MatGetValues_SeqBAIJ, 1520 0, 1521 /*45*/ MatPrintHelp_SeqBAIJ, 1522 MatScale_SeqBAIJ, 1523 0, 1524 0, 1525 0, 1526 /*50*/ MatGetBlockSize_SeqBAIJ, 1527 MatGetRowIJ_SeqBAIJ, 1528 MatRestoreRowIJ_SeqBAIJ, 1529 0, 1530 0, 1531 /*55*/ 0, 1532 0, 1533 0, 1534 0, 1535 MatSetValuesBlocked_SeqBAIJ, 1536 /*60*/ MatGetSubMatrix_SeqBAIJ, 1537 MatDestroy_SeqBAIJ, 1538 MatView_SeqBAIJ, 1539 MatGetPetscMaps_Petsc, 1540 0, 1541 /*65*/ 0, 1542 0, 1543 0, 1544 0, 1545 0, 1546 /*70*/ MatGetRowMax_SeqBAIJ, 1547 MatConvert_Basic, 1548 0, 1549 0, 1550 0, 1551 /*75*/ 0, 1552 0, 1553 0, 1554 0, 1555 0, 1556 /*80*/ 0, 1557 0, 1558 0, 1559 0, 1560 0, 1561 /*85*/ MatLoad_SeqBAIJ 1562 }; 1563 1564 EXTERN_C_BEGIN 1565 #undef __FUNCT__ 1566 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 1567 int MatStoreValues_SeqBAIJ(Mat mat) 1568 { 1569 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1570 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1571 int ierr; 1572 1573 PetscFunctionBegin; 1574 if (aij->nonew != 1) { 1575 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1576 } 1577 1578 /* allocate space for values if not already there */ 1579 if (!aij->saved_values) { 1580 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1581 } 1582 1583 /* copy values over */ 1584 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1585 PetscFunctionReturn(0); 1586 } 1587 EXTERN_C_END 1588 1589 EXTERN_C_BEGIN 1590 #undef __FUNCT__ 1591 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 1592 int MatRetrieveValues_SeqBAIJ(Mat mat) 1593 { 1594 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1595 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 1596 1597 PetscFunctionBegin; 1598 if (aij->nonew != 1) { 1599 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1600 } 1601 if (!aij->saved_values) { 1602 SETERRQ(1,"Must call MatStoreValues(A);first"); 1603 } 1604 1605 /* copy values over */ 1606 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1607 PetscFunctionReturn(0); 1608 } 1609 EXTERN_C_END 1610 1611 EXTERN_C_BEGIN 1612 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,Mat*); 1613 EXTERN_C_END 1614 1615 EXTERN_C_BEGIN 1616 #undef __FUNCT__ 1617 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 1618 int MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,int bs,int nz,int *nnz) 1619 { 1620 Mat_SeqBAIJ *b; 1621 int i,len,ierr,mbs,nbs,bs2,newbs = bs; 1622 PetscTruth flg; 1623 1624 PetscFunctionBegin; 1625 1626 B->preallocated = PETSC_TRUE; 1627 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr); 1628 if (nnz && newbs != bs) { 1629 SETERRQ(1,"Cannot change blocksize from command line if setting nnz"); 1630 } 1631 bs = newbs; 1632 1633 mbs = B->m/bs; 1634 nbs = B->n/bs; 1635 bs2 = bs*bs; 1636 1637 if (mbs*bs!=B->m || nbs*bs!=B->n) { 1638 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs); 1639 } 1640 1641 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1642 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1643 if (nnz) { 1644 for (i=0; i<mbs; i++) { 1645 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1646 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs); 1647 } 1648 } 1649 1650 b = (Mat_SeqBAIJ*)B->data; 1651 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1652 B->ops->solve = MatSolve_SeqBAIJ_Update; 1653 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 1654 if (!flg) { 1655 switch (bs) { 1656 case 1: 1657 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1658 B->ops->mult = MatMult_SeqBAIJ_1; 1659 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 1660 break; 1661 case 2: 1662 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 1663 B->ops->mult = MatMult_SeqBAIJ_2; 1664 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 1665 break; 1666 case 3: 1667 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 1668 B->ops->mult = MatMult_SeqBAIJ_3; 1669 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 1670 break; 1671 case 4: 1672 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 1673 B->ops->mult = MatMult_SeqBAIJ_4; 1674 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 1675 break; 1676 case 5: 1677 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 1678 B->ops->mult = MatMult_SeqBAIJ_5; 1679 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 1680 break; 1681 case 6: 1682 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 1683 B->ops->mult = MatMult_SeqBAIJ_6; 1684 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 1685 break; 1686 case 7: 1687 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 1688 B->ops->mult = MatMult_SeqBAIJ_7; 1689 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 1690 break; 1691 default: 1692 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 1693 B->ops->mult = MatMult_SeqBAIJ_N; 1694 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 1695 break; 1696 } 1697 } 1698 b->bs = bs; 1699 b->mbs = mbs; 1700 b->nbs = nbs; 1701 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 1702 if (!nnz) { 1703 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1704 else if (nz <= 0) nz = 1; 1705 for (i=0; i<mbs; i++) b->imax[i] = nz; 1706 nz = nz*mbs; 1707 } else { 1708 nz = 0; 1709 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1710 } 1711 1712 /* allocate the matrix space */ 1713 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 1714 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1715 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1716 b->j = (int*)(b->a + nz*bs2); 1717 ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 1718 b->i = b->j + nz; 1719 b->singlemalloc = PETSC_TRUE; 1720 1721 b->i[0] = 0; 1722 for (i=1; i<mbs+1; i++) { 1723 b->i[i] = b->i[i-1] + b->imax[i-1]; 1724 } 1725 1726 /* b->ilen will count nonzeros in each block row so far. */ 1727 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 1728 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1729 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1730 1731 b->bs = bs; 1732 b->bs2 = bs2; 1733 b->mbs = mbs; 1734 b->nz = 0; 1735 b->maxnz = nz*bs2; 1736 B->info.nz_unneeded = (PetscReal)b->maxnz; 1737 PetscFunctionReturn(0); 1738 } 1739 EXTERN_C_END 1740 1741 /*MC 1742 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 1743 block sparse compressed row format. 1744 1745 Options Database Keys: 1746 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 1747 1748 Level: beginner 1749 1750 .seealso: MatCreateSeqBAIJ 1751 M*/ 1752 1753 EXTERN_C_BEGIN 1754 #undef __FUNCT__ 1755 #define __FUNCT__ "MatCreate_SeqBAIJ" 1756 int MatCreate_SeqBAIJ(Mat B) 1757 { 1758 int ierr,size; 1759 Mat_SeqBAIJ *b; 1760 1761 PetscFunctionBegin; 1762 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1763 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1764 1765 B->m = B->M = PetscMax(B->m,B->M); 1766 B->n = B->N = PetscMax(B->n,B->N); 1767 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 1768 B->data = (void*)b; 1769 ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 1770 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1771 B->factor = 0; 1772 B->lupivotthreshold = 1.0; 1773 B->mapping = 0; 1774 b->row = 0; 1775 b->col = 0; 1776 b->icol = 0; 1777 b->reallocs = 0; 1778 b->saved_values = 0; 1779 #if defined(PETSC_USE_MAT_SINGLE) 1780 b->setvalueslen = 0; 1781 b->setvaluescopy = PETSC_NULL; 1782 #endif 1783 1784 ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr); 1785 ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr); 1786 1787 b->sorted = PETSC_FALSE; 1788 b->roworiented = PETSC_TRUE; 1789 b->nonew = 0; 1790 b->diag = 0; 1791 b->solve_work = 0; 1792 b->mult_work = 0; 1793 B->spptr = 0; 1794 B->info.nz_unneeded = (PetscReal)b->maxnz; 1795 b->keepzeroedrows = PETSC_FALSE; 1796 b->xtoy = 0; 1797 b->XtoY = 0; 1798 1799 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1800 "MatStoreValues_SeqBAIJ", 1801 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 1802 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1803 "MatRetrieveValues_SeqBAIJ", 1804 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 1805 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 1806 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 1807 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 1808 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 1809 "MatConvert_SeqBAIJ_SeqAIJ", 1810 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 1811 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 1812 "MatSeqBAIJSetPreallocation_SeqBAIJ", 1813 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 1814 PetscFunctionReturn(0); 1815 } 1816 EXTERN_C_END 1817 1818 #undef __FUNCT__ 1819 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 1820 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1821 { 1822 Mat C; 1823 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 1824 int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr; 1825 1826 PetscFunctionBegin; 1827 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1828 1829 *B = 0; 1830 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1831 ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr); 1832 c = (Mat_SeqBAIJ*)C->data; 1833 1834 c->bs = a->bs; 1835 c->bs2 = a->bs2; 1836 c->mbs = a->mbs; 1837 c->nbs = a->nbs; 1838 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1839 1840 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1841 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1842 for (i=0; i<mbs; i++) { 1843 c->imax[i] = a->imax[i]; 1844 c->ilen[i] = a->ilen[i]; 1845 } 1846 1847 /* allocate the matrix space */ 1848 c->singlemalloc = PETSC_TRUE; 1849 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1850 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1851 c->j = (int*)(c->a + nz*bs2); 1852 c->i = c->j + nz; 1853 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1854 if (mbs > 0) { 1855 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1856 if (cpvalues == MAT_COPY_VALUES) { 1857 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1858 } else { 1859 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1860 } 1861 } 1862 1863 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1864 c->sorted = a->sorted; 1865 c->roworiented = a->roworiented; 1866 c->nonew = a->nonew; 1867 1868 if (a->diag) { 1869 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1870 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1871 for (i=0; i<mbs; i++) { 1872 c->diag[i] = a->diag[i]; 1873 } 1874 } else c->diag = 0; 1875 c->nz = a->nz; 1876 c->maxnz = a->maxnz; 1877 c->solve_work = 0; 1878 C->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1879 c->mult_work = 0; 1880 C->preallocated = PETSC_TRUE; 1881 C->assembled = PETSC_TRUE; 1882 *B = C; 1883 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1884 PetscFunctionReturn(0); 1885 } 1886 1887 #undef __FUNCT__ 1888 #define __FUNCT__ "MatLoad_SeqBAIJ" 1889 int MatLoad_SeqBAIJ(PetscViewer viewer,const MatType type,Mat *A) 1890 { 1891 Mat_SeqBAIJ *a; 1892 Mat B; 1893 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1894 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1895 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1896 int *masked,nmask,tmp,bs2,ishift; 1897 PetscScalar *aa; 1898 MPI_Comm comm = ((PetscObject)viewer)->comm; 1899 1900 PetscFunctionBegin; 1901 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1902 bs2 = bs*bs; 1903 1904 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1905 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1906 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1907 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1908 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1909 M = header[1]; N = header[2]; nz = header[3]; 1910 1911 if (header[3] < 0) { 1912 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 1913 } 1914 1915 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1916 1917 /* 1918 This code adds extra rows to make sure the number of rows is 1919 divisible by the blocksize 1920 */ 1921 mbs = M/bs; 1922 extra_rows = bs - M + bs*(mbs); 1923 if (extra_rows == bs) extra_rows = 0; 1924 else mbs++; 1925 if (extra_rows) { 1926 PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 1927 } 1928 1929 /* read in row lengths */ 1930 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1931 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1932 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1933 1934 /* read in column indices */ 1935 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1936 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1937 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1938 1939 /* loop over row lengths determining block row lengths */ 1940 ierr = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr); 1941 ierr = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1942 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1943 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1944 masked = mask + mbs; 1945 rowcount = 0; nzcount = 0; 1946 for (i=0; i<mbs; i++) { 1947 nmask = 0; 1948 for (j=0; j<bs; j++) { 1949 kmax = rowlengths[rowcount]; 1950 for (k=0; k<kmax; k++) { 1951 tmp = jj[nzcount++]/bs; 1952 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1953 } 1954 rowcount++; 1955 } 1956 browlengths[i] += nmask; 1957 /* zero out the mask elements we set */ 1958 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1959 } 1960 1961 /* create our matrix */ 1962 ierr = MatCreate(comm,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows,&B); 1963 ierr = MatSetType(B,type);CHKERRQ(ierr); 1964 ierr = MatSeqBAIJSetPreallocation(B,bs,0,browlengths);CHKERRQ(ierr); 1965 a = (Mat_SeqBAIJ*)B->data; 1966 1967 /* set matrix "i" values */ 1968 a->i[0] = 0; 1969 for (i=1; i<= mbs; i++) { 1970 a->i[i] = a->i[i-1] + browlengths[i-1]; 1971 a->ilen[i-1] = browlengths[i-1]; 1972 } 1973 a->nz = 0; 1974 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 1975 1976 /* read in nonzero values */ 1977 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1978 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1979 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1980 1981 /* set "a" and "j" values into matrix */ 1982 nzcount = 0; jcount = 0; 1983 for (i=0; i<mbs; i++) { 1984 nzcountb = nzcount; 1985 nmask = 0; 1986 for (j=0; j<bs; j++) { 1987 kmax = rowlengths[i*bs+j]; 1988 for (k=0; k<kmax; k++) { 1989 tmp = jj[nzcount++]/bs; 1990 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1991 } 1992 } 1993 /* sort the masked values */ 1994 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1995 1996 /* set "j" values into matrix */ 1997 maskcount = 1; 1998 for (j=0; j<nmask; j++) { 1999 a->j[jcount++] = masked[j]; 2000 mask[masked[j]] = maskcount++; 2001 } 2002 /* set "a" values into matrix */ 2003 ishift = bs2*a->i[i]; 2004 for (j=0; j<bs; j++) { 2005 kmax = rowlengths[i*bs+j]; 2006 for (k=0; k<kmax; k++) { 2007 tmp = jj[nzcountb]/bs ; 2008 block = mask[tmp] - 1; 2009 point = jj[nzcountb] - bs*tmp; 2010 idx = ishift + bs2*block + j + bs*point; 2011 a->a[idx] = (MatScalar)aa[nzcountb++]; 2012 } 2013 } 2014 /* zero out the mask elements we set */ 2015 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2016 } 2017 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 2018 2019 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2020 ierr = PetscFree(browlengths);CHKERRQ(ierr); 2021 ierr = PetscFree(aa);CHKERRQ(ierr); 2022 ierr = PetscFree(jj);CHKERRQ(ierr); 2023 ierr = PetscFree(mask);CHKERRQ(ierr); 2024 2025 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2026 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2027 ierr = MatView_Private(B);CHKERRQ(ierr); 2028 2029 *A = B; 2030 PetscFunctionReturn(0); 2031 } 2032 2033 #undef __FUNCT__ 2034 #define __FUNCT__ "MatCreateSeqBAIJ" 2035 /*@C 2036 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 2037 compressed row) format. For good matrix assembly performance the 2038 user should preallocate the matrix storage by setting the parameter nz 2039 (or the array nnz). By setting these parameters accurately, performance 2040 during matrix assembly can be increased by more than a factor of 50. 2041 2042 Collective on MPI_Comm 2043 2044 Input Parameters: 2045 + comm - MPI communicator, set to PETSC_COMM_SELF 2046 . bs - size of block 2047 . m - number of rows 2048 . n - number of columns 2049 . nz - number of nonzero blocks per block row (same for all rows) 2050 - nnz - array containing the number of nonzero blocks in the various block rows 2051 (possibly different for each block row) or PETSC_NULL 2052 2053 Output Parameter: 2054 . A - the matrix 2055 2056 Options Database Keys: 2057 . -mat_no_unroll - uses code that does not unroll the loops in the 2058 block calculations (much slower) 2059 . -mat_block_size - size of the blocks to use 2060 2061 Level: intermediate 2062 2063 Notes: 2064 A nonzero block is any block that as 1 or more nonzeros in it 2065 2066 The block AIJ format is fully compatible with standard Fortran 77 2067 storage. That is, the stored row and column indices can begin at 2068 either one (as in Fortran) or zero. See the users' manual for details. 2069 2070 Specify the preallocated storage with either nz or nnz (not both). 2071 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2072 allocation. For additional details, see the users manual chapter on 2073 matrices. 2074 2075 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2076 @*/ 2077 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A) 2078 { 2079 int ierr; 2080 2081 PetscFunctionBegin; 2082 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 2083 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2084 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 2085 PetscFunctionReturn(0); 2086 } 2087 2088 #undef __FUNCT__ 2089 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 2090 /*@C 2091 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 2092 per row in the matrix. For good matrix assembly performance the 2093 user should preallocate the matrix storage by setting the parameter nz 2094 (or the array nnz). By setting these parameters accurately, performance 2095 during matrix assembly can be increased by more than a factor of 50. 2096 2097 Collective on MPI_Comm 2098 2099 Input Parameters: 2100 + A - the matrix 2101 . bs - size of block 2102 . nz - number of block nonzeros per block row (same for all rows) 2103 - nnz - array containing the number of block nonzeros in the various block rows 2104 (possibly different for each block row) or PETSC_NULL 2105 2106 Options Database Keys: 2107 . -mat_no_unroll - uses code that does not unroll the loops in the 2108 block calculations (much slower) 2109 . -mat_block_size - size of the blocks to use 2110 2111 Level: intermediate 2112 2113 Notes: 2114 The block AIJ format is fully compatible with standard Fortran 77 2115 storage. That is, the stored row and column indices can begin at 2116 either one (as in Fortran) or zero. See the users' manual for details. 2117 2118 Specify the preallocated storage with either nz or nnz (not both). 2119 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2120 allocation. For additional details, see the users manual chapter on 2121 matrices. 2122 2123 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2124 @*/ 2125 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[]) 2126 { 2127 int ierr,(*f)(Mat,int,int,const int[]); 2128 2129 PetscFunctionBegin; 2130 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2131 if (f) { 2132 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 2133 } 2134 PetscFunctionReturn(0); 2135 } 2136