1 /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/ 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" 8 #include "src/vec/vecimpl.h" 9 #include "src/inline/spops.h" 10 #include "petscsys.h" /*I "petscmat.h" I*/ 11 12 /* 13 Special version for Fun3d sequential benchmark 14 */ 15 #if defined(PETSC_HAVE_FORTRAN_CAPS) 16 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 17 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 18 #define matsetvaluesblocked4_ matsetvaluesblocked4 19 #endif 20 21 EXTERN_C_BEGIN 22 #undef __FUNCT__ 23 #define __FUNCT__ "matsetvaluesblocked4_" 24 void matsetvaluesblocked4_(Mat *AA,int *mm,const int im[],int *nn,const int in[],const PetscScalar v[]) 25 { 26 Mat A = *AA; 27 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 28 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 29 int *ai=a->i,*ailen=a->ilen; 30 int *aj=a->j,stepval; 31 const PetscScalar *value = v; 32 MatScalar *ap,*aa = a->a,*bap; 33 34 PetscFunctionBegin; 35 stepval = (n-1)*4; 36 for (k=0; k<m; k++) { /* loop over added rows */ 37 row = im[k]; 38 rp = aj + ai[row]; 39 ap = aa + 16*ai[row]; 40 nrow = ailen[row]; 41 low = 0; 42 for (l=0; l<n; l++) { /* loop over added columns */ 43 col = in[l]; 44 value = v + k*(stepval+4)*4 + l*4; 45 low = 0; high = nrow; 46 while (high-low > 7) { 47 t = (low+high)/2; 48 if (rp[t] > col) high = t; 49 else low = t; 50 } 51 for (i=low; i<high; i++) { 52 if (rp[i] > col) break; 53 if (rp[i] == col) { 54 bap = ap + 16*i; 55 for (ii=0; ii<4; ii++,value+=stepval) { 56 for (jj=ii; jj<16; jj+=4) { 57 bap[jj] += *value++; 58 } 59 } 60 goto noinsert2; 61 } 62 } 63 N = nrow++ - 1; 64 /* shift up all the later entries in this row */ 65 for (ii=N; ii>=i; ii--) { 66 rp[ii+1] = rp[ii]; 67 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 68 } 69 if (N >= i) { 70 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 71 } 72 rp[i] = col; 73 bap = ap + 16*i; 74 for (ii=0; ii<4; ii++,value+=stepval) { 75 for (jj=ii; jj<16; jj+=4) { 76 bap[jj] = *value++; 77 } 78 } 79 noinsert2:; 80 low = i; 81 } 82 ailen[row] = nrow; 83 } 84 } 85 EXTERN_C_END 86 87 #if defined(PETSC_HAVE_FORTRAN_CAPS) 88 #define matsetvalues4_ MATSETVALUES4 89 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 90 #define matsetvalues4_ matsetvalues4 91 #endif 92 93 EXTERN_C_BEGIN 94 #undef __FUNCT__ 95 #define __FUNCT__ "MatSetValues4_" 96 void matsetvalues4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v) 97 { 98 Mat A = *AA; 99 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 100 int *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 101 int *ai=a->i,*ailen=a->ilen; 102 int *aj=a->j,brow,bcol; 103 int ridx,cidx; 104 MatScalar *ap,value,*aa=a->a,*bap; 105 106 PetscFunctionBegin; 107 for (k=0; k<m; k++) { /* loop over added rows */ 108 row = im[k]; brow = row/4; 109 rp = aj + ai[brow]; 110 ap = aa + 16*ai[brow]; 111 nrow = ailen[brow]; 112 low = 0; 113 for (l=0; l<n; l++) { /* loop over added columns */ 114 col = in[l]; bcol = col/4; 115 ridx = row % 4; cidx = col % 4; 116 value = v[l + k*n]; 117 low = 0; high = nrow; 118 while (high-low > 7) { 119 t = (low+high)/2; 120 if (rp[t] > bcol) high = t; 121 else low = t; 122 } 123 for (i=low; i<high; i++) { 124 if (rp[i] > bcol) break; 125 if (rp[i] == bcol) { 126 bap = ap + 16*i + 4*cidx + ridx; 127 *bap += value; 128 goto noinsert1; 129 } 130 } 131 N = nrow++ - 1; 132 /* shift up all the later entries in this row */ 133 for (ii=N; ii>=i; ii--) { 134 rp[ii+1] = rp[ii]; 135 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 136 } 137 if (N>=i) { 138 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 139 } 140 rp[i] = bcol; 141 ap[16*i + 4*cidx + ridx] = value; 142 noinsert1:; 143 low = i; 144 } 145 ailen[brow] = nrow; 146 } 147 } 148 EXTERN_C_END 149 150 /* UGLY, ugly, ugly 151 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 152 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 153 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 154 converts the entries into single precision and then calls ..._MatScalar() to put them 155 into the single precision data structures. 156 */ 157 #if defined(PETSC_USE_MAT_SINGLE) 158 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 159 #else 160 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 161 #endif 162 163 #define CHUNKSIZE 10 164 165 /* 166 Checks for missing diagonals 167 */ 168 #undef __FUNCT__ 169 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 170 int MatMissingDiagonal_SeqBAIJ(Mat A) 171 { 172 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 173 int *diag,*jj = a->j,i,ierr; 174 175 PetscFunctionBegin; 176 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 177 diag = a->diag; 178 for (i=0; i<a->mbs; i++) { 179 if (jj[diag[i]] != i) { 180 SETERRQ1(1,"Matrix is missing diagonal number %d",i); 181 } 182 } 183 PetscFunctionReturn(0); 184 } 185 186 #undef __FUNCT__ 187 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 188 int MatMarkDiagonal_SeqBAIJ(Mat A) 189 { 190 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 191 int i,j,*diag,m = a->mbs,ierr; 192 193 PetscFunctionBegin; 194 if (a->diag) PetscFunctionReturn(0); 195 196 ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr); 197 PetscLogObjectMemory(A,(m+1)*sizeof(int)); 198 for (i=0; i<m; i++) { 199 diag[i] = a->i[i+1]; 200 for (j=a->i[i]; j<a->i[i+1]; j++) { 201 if (a->j[j] == i) { 202 diag[i] = j; 203 break; 204 } 205 } 206 } 207 a->diag = diag; 208 PetscFunctionReturn(0); 209 } 210 211 212 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 213 214 #undef __FUNCT__ 215 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 216 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done) 217 { 218 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 219 int ierr,n = a->mbs,i; 220 221 PetscFunctionBegin; 222 *nn = n; 223 if (!ia) PetscFunctionReturn(0); 224 if (symmetric) { 225 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 226 } else if (oshift == 1) { 227 /* temporarily add 1 to i and j indices */ 228 int nz = a->i[n]; 229 for (i=0; i<nz; i++) a->j[i]++; 230 for (i=0; i<n+1; i++) a->i[i]++; 231 *ia = a->i; *ja = a->j; 232 } else { 233 *ia = a->i; *ja = a->j; 234 } 235 236 PetscFunctionReturn(0); 237 } 238 239 #undef __FUNCT__ 240 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 241 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done) 242 { 243 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 244 int i,n = a->mbs,ierr; 245 246 PetscFunctionBegin; 247 if (!ia) PetscFunctionReturn(0); 248 if (symmetric) { 249 ierr = PetscFree(*ia);CHKERRQ(ierr); 250 ierr = PetscFree(*ja);CHKERRQ(ierr); 251 } else if (oshift == 1) { 252 int nz = a->i[n]-1; 253 for (i=0; i<nz; i++) a->j[i]--; 254 for (i=0; i<n+1; i++) a->i[i]--; 255 } 256 PetscFunctionReturn(0); 257 } 258 259 #undef __FUNCT__ 260 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ" 261 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs) 262 { 263 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 264 265 PetscFunctionBegin; 266 *bs = baij->bs; 267 PetscFunctionReturn(0); 268 } 269 270 271 #undef __FUNCT__ 272 #define __FUNCT__ "MatDestroy_SeqBAIJ" 273 int MatDestroy_SeqBAIJ(Mat A) 274 { 275 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 276 int ierr; 277 278 PetscFunctionBegin; 279 #if defined(PETSC_USE_LOG) 280 PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz); 281 #endif 282 ierr = PetscFree(a->a);CHKERRQ(ierr); 283 if (!a->singlemalloc) { 284 ierr = PetscFree(a->i);CHKERRQ(ierr); 285 ierr = PetscFree(a->j);CHKERRQ(ierr); 286 } 287 if (a->row) { 288 ierr = ISDestroy(a->row);CHKERRQ(ierr); 289 } 290 if (a->col) { 291 ierr = ISDestroy(a->col);CHKERRQ(ierr); 292 } 293 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 294 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 295 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 296 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 297 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 298 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 299 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 300 #if defined(PETSC_USE_MAT_SINGLE) 301 if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);} 302 #endif 303 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 304 305 ierr = PetscFree(a);CHKERRQ(ierr); 306 PetscFunctionReturn(0); 307 } 308 309 #undef __FUNCT__ 310 #define __FUNCT__ "MatSetOption_SeqBAIJ" 311 int MatSetOption_SeqBAIJ(Mat A,MatOption op) 312 { 313 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 314 315 PetscFunctionBegin; 316 switch (op) { 317 case MAT_ROW_ORIENTED: 318 a->roworiented = PETSC_TRUE; 319 break; 320 case MAT_COLUMN_ORIENTED: 321 a->roworiented = PETSC_FALSE; 322 break; 323 case MAT_COLUMNS_SORTED: 324 a->sorted = PETSC_TRUE; 325 break; 326 case MAT_COLUMNS_UNSORTED: 327 a->sorted = PETSC_FALSE; 328 break; 329 case MAT_KEEP_ZEROED_ROWS: 330 a->keepzeroedrows = PETSC_TRUE; 331 break; 332 case MAT_NO_NEW_NONZERO_LOCATIONS: 333 a->nonew = 1; 334 break; 335 case MAT_NEW_NONZERO_LOCATION_ERR: 336 a->nonew = -1; 337 break; 338 case MAT_NEW_NONZERO_ALLOCATION_ERR: 339 a->nonew = -2; 340 break; 341 case MAT_YES_NEW_NONZERO_LOCATIONS: 342 a->nonew = 0; 343 break; 344 case MAT_ROWS_SORTED: 345 case MAT_ROWS_UNSORTED: 346 case MAT_YES_NEW_DIAGONALS: 347 case MAT_IGNORE_OFF_PROC_ENTRIES: 348 case MAT_USE_HASH_TABLE: 349 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 350 break; 351 case MAT_NO_NEW_DIAGONALS: 352 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 353 case MAT_SYMMETRIC: 354 case MAT_STRUCTURALLY_SYMMETRIC: 355 case MAT_NOT_SYMMETRIC: 356 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 357 case MAT_HERMITIAN: 358 case MAT_NOT_HERMITIAN: 359 case MAT_SYMMETRY_ETERNAL: 360 case MAT_NOT_SYMMETRY_ETERNAL: 361 break; 362 default: 363 SETERRQ(PETSC_ERR_SUP,"unknown option"); 364 } 365 PetscFunctionReturn(0); 366 } 367 368 #undef __FUNCT__ 369 #define __FUNCT__ "MatGetRow_SeqBAIJ" 370 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 371 { 372 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 373 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr; 374 MatScalar *aa,*aa_i; 375 PetscScalar *v_i; 376 377 PetscFunctionBegin; 378 bs = a->bs; 379 ai = a->i; 380 aj = a->j; 381 aa = a->a; 382 bs2 = a->bs2; 383 384 if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %d out of range", row); 385 386 bn = row/bs; /* Block number */ 387 bp = row % bs; /* Block Position */ 388 M = ai[bn+1] - ai[bn]; 389 *nz = bs*M; 390 391 if (v) { 392 *v = 0; 393 if (*nz) { 394 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 395 for (i=0; i<M; i++) { /* for each block in the block row */ 396 v_i = *v + i*bs; 397 aa_i = aa + bs2*(ai[bn] + i); 398 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 399 } 400 } 401 } 402 403 if (idx) { 404 *idx = 0; 405 if (*nz) { 406 ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr); 407 for (i=0; i<M; i++) { /* for each block in the block row */ 408 idx_i = *idx + i*bs; 409 itmp = bs*aj[ai[bn] + i]; 410 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 411 } 412 } 413 } 414 PetscFunctionReturn(0); 415 } 416 417 #undef __FUNCT__ 418 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 419 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 420 { 421 int ierr; 422 423 PetscFunctionBegin; 424 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 425 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 426 PetscFunctionReturn(0); 427 } 428 429 #undef __FUNCT__ 430 #define __FUNCT__ "MatTranspose_SeqBAIJ" 431 int MatTranspose_SeqBAIJ(Mat A,Mat *B) 432 { 433 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 434 Mat C; 435 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 436 int *rows,*cols,bs2=a->bs2; 437 PetscScalar *array; 438 439 PetscFunctionBegin; 440 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 441 ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr); 442 ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr); 443 444 #if defined(PETSC_USE_MAT_SINGLE) 445 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 446 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 447 #else 448 array = a->a; 449 #endif 450 451 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 452 ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr); 453 ierr = PetscFree(col);CHKERRQ(ierr); 454 ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr); 455 cols = rows + bs; 456 for (i=0; i<mbs; i++) { 457 cols[0] = i*bs; 458 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 459 len = ai[i+1] - ai[i]; 460 for (j=0; j<len; j++) { 461 rows[0] = (*aj++)*bs; 462 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 463 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 464 array += bs2; 465 } 466 } 467 ierr = PetscFree(rows);CHKERRQ(ierr); 468 #if defined(PETSC_USE_MAT_SINGLE) 469 ierr = PetscFree(array); 470 #endif 471 472 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 473 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 474 475 if (B) { 476 *B = C; 477 } else { 478 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 479 } 480 PetscFunctionReturn(0); 481 } 482 483 #undef __FUNCT__ 484 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 485 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 486 { 487 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 488 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 489 PetscScalar *aa; 490 FILE *file; 491 492 PetscFunctionBegin; 493 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 494 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 495 col_lens[0] = MAT_FILE_COOKIE; 496 497 col_lens[1] = A->m; 498 col_lens[2] = A->n; 499 col_lens[3] = a->nz*bs2; 500 501 /* store lengths of each row and write (including header) to file */ 502 count = 0; 503 for (i=0; i<a->mbs; i++) { 504 for (j=0; j<bs; j++) { 505 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 506 } 507 } 508 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 509 ierr = PetscFree(col_lens);CHKERRQ(ierr); 510 511 /* store column indices (zero start index) */ 512 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 513 count = 0; 514 for (i=0; i<a->mbs; i++) { 515 for (j=0; j<bs; j++) { 516 for (k=a->i[i]; k<a->i[i+1]; k++) { 517 for (l=0; l<bs; l++) { 518 jj[count++] = bs*a->j[k] + l; 519 } 520 } 521 } 522 } 523 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr); 524 ierr = PetscFree(jj);CHKERRQ(ierr); 525 526 /* store nonzero values */ 527 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 528 count = 0; 529 for (i=0; i<a->mbs; i++) { 530 for (j=0; j<bs; j++) { 531 for (k=a->i[i]; k<a->i[i+1]; k++) { 532 for (l=0; l<bs; l++) { 533 aa[count++] = a->a[bs2*k + l*bs + j]; 534 } 535 } 536 } 537 } 538 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr); 539 ierr = PetscFree(aa);CHKERRQ(ierr); 540 541 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 542 if (file) { 543 fprintf(file,"-matload_block_size %d\n",a->bs); 544 } 545 PetscFunctionReturn(0); 546 } 547 548 #undef __FUNCT__ 549 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 550 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 551 { 552 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 553 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 554 PetscViewerFormat format; 555 556 PetscFunctionBegin; 557 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 558 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 559 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 560 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 561 Mat aij; 562 ierr = MatConvert(A,MATSEQAIJ,&aij);CHKERRQ(ierr); 563 ierr = MatView(aij,viewer);CHKERRQ(ierr); 564 ierr = MatDestroy(aij);CHKERRQ(ierr); 565 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 566 PetscFunctionReturn(0); 567 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 568 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 569 for (i=0; i<a->mbs; i++) { 570 for (j=0; j<bs; j++) { 571 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 572 for (k=a->i[i]; k<a->i[i+1]; k++) { 573 for (l=0; l<bs; l++) { 574 #if defined(PETSC_USE_COMPLEX) 575 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 576 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %gi) ",bs*a->j[k]+l, 577 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 578 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 579 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %gi) ",bs*a->j[k]+l, 580 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 581 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 582 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 583 } 584 #else 585 if (a->a[bs2*k + l*bs + j] != 0.0) { 586 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 587 } 588 #endif 589 } 590 } 591 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 592 } 593 } 594 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 595 } else { 596 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 597 for (i=0; i<a->mbs; i++) { 598 for (j=0; j<bs; j++) { 599 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 600 for (k=a->i[i]; k<a->i[i+1]; k++) { 601 for (l=0; l<bs; l++) { 602 #if defined(PETSC_USE_COMPLEX) 603 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 604 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 605 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 606 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 607 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 608 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 609 } else { 610 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 611 } 612 #else 613 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 614 #endif 615 } 616 } 617 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 618 } 619 } 620 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 621 } 622 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 623 PetscFunctionReturn(0); 624 } 625 626 #undef __FUNCT__ 627 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 628 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 629 { 630 Mat A = (Mat) Aa; 631 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 632 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2; 633 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 634 MatScalar *aa; 635 PetscViewer viewer; 636 637 PetscFunctionBegin; 638 639 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 640 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 641 642 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 643 644 /* loop over matrix elements drawing boxes */ 645 color = PETSC_DRAW_BLUE; 646 for (i=0,row=0; i<mbs; i++,row+=bs) { 647 for (j=a->i[i]; j<a->i[i+1]; j++) { 648 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 649 x_l = a->j[j]*bs; x_r = x_l + 1.0; 650 aa = a->a + j*bs2; 651 for (k=0; k<bs; k++) { 652 for (l=0; l<bs; l++) { 653 if (PetscRealPart(*aa++) >= 0.) continue; 654 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 655 } 656 } 657 } 658 } 659 color = PETSC_DRAW_CYAN; 660 for (i=0,row=0; i<mbs; i++,row+=bs) { 661 for (j=a->i[i]; j<a->i[i+1]; j++) { 662 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 663 x_l = a->j[j]*bs; x_r = x_l + 1.0; 664 aa = a->a + j*bs2; 665 for (k=0; k<bs; k++) { 666 for (l=0; l<bs; l++) { 667 if (PetscRealPart(*aa++) != 0.) continue; 668 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 669 } 670 } 671 } 672 } 673 674 color = PETSC_DRAW_RED; 675 for (i=0,row=0; i<mbs; i++,row+=bs) { 676 for (j=a->i[i]; j<a->i[i+1]; j++) { 677 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 678 x_l = a->j[j]*bs; x_r = x_l + 1.0; 679 aa = a->a + j*bs2; 680 for (k=0; k<bs; k++) { 681 for (l=0; l<bs; l++) { 682 if (PetscRealPart(*aa++) <= 0.) continue; 683 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 684 } 685 } 686 } 687 } 688 PetscFunctionReturn(0); 689 } 690 691 #undef __FUNCT__ 692 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 693 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 694 { 695 int ierr; 696 PetscReal xl,yl,xr,yr,w,h; 697 PetscDraw draw; 698 PetscTruth isnull; 699 700 PetscFunctionBegin; 701 702 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 703 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 704 705 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 706 xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0; 707 xr += w; yr += h; xl = -w; yl = -h; 708 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 709 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 710 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 711 PetscFunctionReturn(0); 712 } 713 714 #undef __FUNCT__ 715 #define __FUNCT__ "MatView_SeqBAIJ" 716 int MatView_SeqBAIJ(Mat A,PetscViewer viewer) 717 { 718 int ierr; 719 PetscTruth issocket,isascii,isbinary,isdraw; 720 721 PetscFunctionBegin; 722 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 723 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 724 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 725 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 726 if (issocket) { 727 SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 728 } else if (isascii){ 729 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 730 } else if (isbinary) { 731 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 732 } else if (isdraw) { 733 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 734 } else { 735 SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name); 736 } 737 PetscFunctionReturn(0); 738 } 739 740 741 #undef __FUNCT__ 742 #define __FUNCT__ "MatGetValues_SeqBAIJ" 743 int MatGetValues_SeqBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[]) 744 { 745 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 746 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 747 int *ai = a->i,*ailen = a->ilen; 748 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 749 MatScalar *ap,*aa = a->a,zero = 0.0; 750 751 PetscFunctionBegin; 752 for (k=0; k<m; k++) { /* loop over rows */ 753 row = im[k]; brow = row/bs; 754 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 755 if (row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %d too large", row); 756 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 757 nrow = ailen[brow]; 758 for (l=0; l<n; l++) { /* loop over columns */ 759 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 760 if (in[l] >= A->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %d too large", in[l]); 761 col = in[l] ; 762 bcol = col/bs; 763 cidx = col%bs; 764 ridx = row%bs; 765 high = nrow; 766 low = 0; /* assume unsorted */ 767 while (high-low > 5) { 768 t = (low+high)/2; 769 if (rp[t] > bcol) high = t; 770 else low = t; 771 } 772 for (i=low; i<high; i++) { 773 if (rp[i] > bcol) break; 774 if (rp[i] == bcol) { 775 *v++ = ap[bs2*i+bs*cidx+ridx]; 776 goto finished; 777 } 778 } 779 *v++ = zero; 780 finished:; 781 } 782 } 783 PetscFunctionReturn(0); 784 } 785 786 #if defined(PETSC_USE_MAT_SINGLE) 787 #undef __FUNCT__ 788 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 789 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 790 { 791 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 792 int ierr,i,N = m*n*b->bs2; 793 MatScalar *vsingle; 794 795 PetscFunctionBegin; 796 if (N > b->setvalueslen) { 797 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 798 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 799 b->setvalueslen = N; 800 } 801 vsingle = b->setvaluescopy; 802 for (i=0; i<N; i++) { 803 vsingle[i] = v[i]; 804 } 805 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 806 PetscFunctionReturn(0); 807 } 808 #endif 809 810 811 #undef __FUNCT__ 812 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 813 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode is) 814 { 815 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 816 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 817 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 818 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 819 PetscTruth roworiented=a->roworiented; 820 const MatScalar *value = v; 821 MatScalar *ap,*aa = a->a,*bap; 822 823 PetscFunctionBegin; 824 if (roworiented) { 825 stepval = (n-1)*bs; 826 } else { 827 stepval = (m-1)*bs; 828 } 829 for (k=0; k<m; k++) { /* loop over added rows */ 830 row = im[k]; 831 if (row < 0) continue; 832 #if defined(PETSC_USE_BOPT_g) 833 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1); 834 #endif 835 rp = aj + ai[row]; 836 ap = aa + bs2*ai[row]; 837 rmax = imax[row]; 838 nrow = ailen[row]; 839 low = 0; 840 for (l=0; l<n; l++) { /* loop over added columns */ 841 if (in[l] < 0) continue; 842 #if defined(PETSC_USE_BOPT_g) 843 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],a->nbs-1); 844 #endif 845 col = in[l]; 846 if (roworiented) { 847 value = v + k*(stepval+bs)*bs + l*bs; 848 } else { 849 value = v + l*(stepval+bs)*bs + k*bs; 850 } 851 if (!sorted) low = 0; high = nrow; 852 while (high-low > 7) { 853 t = (low+high)/2; 854 if (rp[t] > col) high = t; 855 else low = t; 856 } 857 for (i=low; i<high; i++) { 858 if (rp[i] > col) break; 859 if (rp[i] == col) { 860 bap = ap + bs2*i; 861 if (roworiented) { 862 if (is == ADD_VALUES) { 863 for (ii=0; ii<bs; ii++,value+=stepval) { 864 for (jj=ii; jj<bs2; jj+=bs) { 865 bap[jj] += *value++; 866 } 867 } 868 } else { 869 for (ii=0; ii<bs; ii++,value+=stepval) { 870 for (jj=ii; jj<bs2; jj+=bs) { 871 bap[jj] = *value++; 872 } 873 } 874 } 875 } else { 876 if (is == ADD_VALUES) { 877 for (ii=0; ii<bs; ii++,value+=stepval) { 878 for (jj=0; jj<bs; jj++) { 879 *bap++ += *value++; 880 } 881 } 882 } else { 883 for (ii=0; ii<bs; ii++,value+=stepval) { 884 for (jj=0; jj<bs; jj++) { 885 *bap++ = *value++; 886 } 887 } 888 } 889 } 890 goto noinsert2; 891 } 892 } 893 if (nonew == 1) goto noinsert2; 894 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 895 if (nrow >= rmax) { 896 /* there is no extra room in row, therefore enlarge */ 897 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 898 MatScalar *new_a; 899 900 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 901 902 /* malloc new storage space */ 903 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 904 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 905 new_j = (int*)(new_a + bs2*new_nz); 906 new_i = new_j + new_nz; 907 908 /* copy over old data into new slots */ 909 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 910 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 911 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 912 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 913 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 914 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 915 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 916 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 917 /* free up old matrix storage */ 918 ierr = PetscFree(a->a);CHKERRQ(ierr); 919 if (!a->singlemalloc) { 920 ierr = PetscFree(a->i);CHKERRQ(ierr); 921 ierr = PetscFree(a->j);CHKERRQ(ierr); 922 } 923 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 924 a->singlemalloc = PETSC_TRUE; 925 926 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 927 rmax = imax[row] = imax[row] + CHUNKSIZE; 928 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 929 a->maxnz += bs2*CHUNKSIZE; 930 a->reallocs++; 931 a->nz++; 932 } 933 N = nrow++ - 1; 934 /* shift up all the later entries in this row */ 935 for (ii=N; ii>=i; ii--) { 936 rp[ii+1] = rp[ii]; 937 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 938 } 939 if (N >= i) { 940 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 941 } 942 rp[i] = col; 943 bap = ap + bs2*i; 944 if (roworiented) { 945 for (ii=0; ii<bs; ii++,value+=stepval) { 946 for (jj=ii; jj<bs2; jj+=bs) { 947 bap[jj] = *value++; 948 } 949 } 950 } else { 951 for (ii=0; ii<bs; ii++,value+=stepval) { 952 for (jj=0; jj<bs; jj++) { 953 *bap++ = *value++; 954 } 955 } 956 } 957 noinsert2:; 958 low = i; 959 } 960 ailen[row] = nrow; 961 } 962 PetscFunctionReturn(0); 963 } 964 965 #undef __FUNCT__ 966 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 967 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 968 { 969 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 970 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 971 int m = A->m,*ip,N,*ailen = a->ilen; 972 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 973 MatScalar *aa = a->a,*ap; 974 975 PetscFunctionBegin; 976 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 977 978 if (m) rmax = ailen[0]; 979 for (i=1; i<mbs; i++) { 980 /* move each row back by the amount of empty slots (fshift) before it*/ 981 fshift += imax[i-1] - ailen[i-1]; 982 rmax = PetscMax(rmax,ailen[i]); 983 if (fshift) { 984 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 985 N = ailen[i]; 986 for (j=0; j<N; j++) { 987 ip[j-fshift] = ip[j]; 988 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 989 } 990 } 991 ai[i] = ai[i-1] + ailen[i-1]; 992 } 993 if (mbs) { 994 fshift += imax[mbs-1] - ailen[mbs-1]; 995 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 996 } 997 /* reset ilen and imax for each row */ 998 for (i=0; i<mbs; i++) { 999 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1000 } 1001 a->nz = ai[mbs]; 1002 1003 /* diagonals may have moved, so kill the diagonal pointers */ 1004 if (fshift && a->diag) { 1005 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1006 PetscLogObjectMemory(A,-(mbs+1)*sizeof(int)); 1007 a->diag = 0; 1008 } 1009 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2); 1010 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs); 1011 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 1012 a->reallocs = 0; 1013 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1014 1015 PetscFunctionReturn(0); 1016 } 1017 1018 1019 1020 /* 1021 This function returns an array of flags which indicate the locations of contiguous 1022 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1023 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1024 Assume: sizes should be long enough to hold all the values. 1025 */ 1026 #undef __FUNCT__ 1027 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1028 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 1029 { 1030 int i,j,k,row; 1031 PetscTruth flg; 1032 1033 PetscFunctionBegin; 1034 for (i=0,j=0; i<n; j++) { 1035 row = idx[i]; 1036 if (row%bs!=0) { /* Not the begining of a block */ 1037 sizes[j] = 1; 1038 i++; 1039 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1040 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1041 i++; 1042 } else { /* Begining of the block, so check if the complete block exists */ 1043 flg = PETSC_TRUE; 1044 for (k=1; k<bs; k++) { 1045 if (row+k != idx[i+k]) { /* break in the block */ 1046 flg = PETSC_FALSE; 1047 break; 1048 } 1049 } 1050 if (flg == PETSC_TRUE) { /* No break in the bs */ 1051 sizes[j] = bs; 1052 i+= bs; 1053 } else { 1054 sizes[j] = 1; 1055 i++; 1056 } 1057 } 1058 } 1059 *bs_max = j; 1060 PetscFunctionReturn(0); 1061 } 1062 1063 #undef __FUNCT__ 1064 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1065 int MatZeroRows_SeqBAIJ(Mat A,IS is,const PetscScalar *diag) 1066 { 1067 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1068 int ierr,i,j,k,count,is_n,*is_idx,*rows; 1069 int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max; 1070 PetscScalar zero = 0.0; 1071 MatScalar *aa; 1072 1073 PetscFunctionBegin; 1074 /* Make a copy of the IS and sort it */ 1075 ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr); 1076 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1077 1078 /* allocate memory for rows,sizes */ 1079 ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr); 1080 sizes = rows + is_n; 1081 1082 /* copy IS values to rows, and sort them */ 1083 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1084 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1085 if (baij->keepzeroedrows) { 1086 for (i=0; i<is_n; i++) { sizes[i] = 1; } 1087 bs_max = is_n; 1088 } else { 1089 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1090 } 1091 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 1092 1093 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1094 row = rows[j]; 1095 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row); 1096 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1097 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 1098 if (sizes[i] == bs && !baij->keepzeroedrows) { 1099 if (diag) { 1100 if (baij->ilen[row/bs] > 0) { 1101 baij->ilen[row/bs] = 1; 1102 baij->j[baij->i[row/bs]] = row/bs; 1103 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 1104 } 1105 /* Now insert all the diagonal values for this bs */ 1106 for (k=0; k<bs; k++) { 1107 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 1108 } 1109 } else { /* (!diag) */ 1110 baij->ilen[row/bs] = 0; 1111 } /* end (!diag) */ 1112 } else { /* (sizes[i] != bs) */ 1113 #if defined (PETSC_USE_DEBUG) 1114 if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1"); 1115 #endif 1116 for (k=0; k<count; k++) { 1117 aa[0] = zero; 1118 aa += bs; 1119 } 1120 if (diag) { 1121 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1122 } 1123 } 1124 } 1125 1126 ierr = PetscFree(rows);CHKERRQ(ierr); 1127 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1128 PetscFunctionReturn(0); 1129 } 1130 1131 #undef __FUNCT__ 1132 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1133 int MatSetValues_SeqBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is) 1134 { 1135 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1136 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 1137 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1138 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 1139 int ridx,cidx,bs2=a->bs2,ierr; 1140 PetscTruth roworiented=a->roworiented; 1141 MatScalar *ap,value,*aa=a->a,*bap; 1142 1143 PetscFunctionBegin; 1144 for (k=0; k<m; k++) { /* loop over added rows */ 1145 row = im[k]; brow = row/bs; 1146 if (row < 0) continue; 1147 #if defined(PETSC_USE_BOPT_g) 1148 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1); 1149 #endif 1150 rp = aj + ai[brow]; 1151 ap = aa + bs2*ai[brow]; 1152 rmax = imax[brow]; 1153 nrow = ailen[brow]; 1154 low = 0; 1155 for (l=0; l<n; l++) { /* loop over added columns */ 1156 if (in[l] < 0) continue; 1157 #if defined(PETSC_USE_BOPT_g) 1158 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1); 1159 #endif 1160 col = in[l]; bcol = col/bs; 1161 ridx = row % bs; cidx = col % bs; 1162 if (roworiented) { 1163 value = v[l + k*n]; 1164 } else { 1165 value = v[k + l*m]; 1166 } 1167 if (!sorted) low = 0; high = nrow; 1168 while (high-low > 7) { 1169 t = (low+high)/2; 1170 if (rp[t] > bcol) high = t; 1171 else low = t; 1172 } 1173 for (i=low; i<high; i++) { 1174 if (rp[i] > bcol) break; 1175 if (rp[i] == bcol) { 1176 bap = ap + bs2*i + bs*cidx + ridx; 1177 if (is == ADD_VALUES) *bap += value; 1178 else *bap = value; 1179 goto noinsert1; 1180 } 1181 } 1182 if (nonew == 1) goto noinsert1; 1183 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 1184 if (nrow >= rmax) { 1185 /* there is no extra room in row, therefore enlarge */ 1186 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1187 MatScalar *new_a; 1188 1189 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 1190 1191 /* Malloc new storage space */ 1192 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1193 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1194 new_j = (int*)(new_a + bs2*new_nz); 1195 new_i = new_j + new_nz; 1196 1197 /* copy over old data into new slots */ 1198 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 1199 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1200 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 1201 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1202 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 1203 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1204 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1205 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1206 /* free up old matrix storage */ 1207 ierr = PetscFree(a->a);CHKERRQ(ierr); 1208 if (!a->singlemalloc) { 1209 ierr = PetscFree(a->i);CHKERRQ(ierr); 1210 ierr = PetscFree(a->j);CHKERRQ(ierr); 1211 } 1212 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1213 a->singlemalloc = PETSC_TRUE; 1214 1215 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1216 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1217 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 1218 a->maxnz += bs2*CHUNKSIZE; 1219 a->reallocs++; 1220 a->nz++; 1221 } 1222 N = nrow++ - 1; 1223 /* shift up all the later entries in this row */ 1224 for (ii=N; ii>=i; ii--) { 1225 rp[ii+1] = rp[ii]; 1226 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1227 } 1228 if (N>=i) { 1229 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1230 } 1231 rp[i] = bcol; 1232 ap[bs2*i + bs*cidx + ridx] = value; 1233 noinsert1:; 1234 low = i; 1235 } 1236 ailen[brow] = nrow; 1237 } 1238 PetscFunctionReturn(0); 1239 } 1240 1241 1242 #undef __FUNCT__ 1243 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1244 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info) 1245 { 1246 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1247 Mat outA; 1248 int ierr; 1249 PetscTruth row_identity,col_identity; 1250 1251 PetscFunctionBegin; 1252 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1253 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1254 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1255 if (!row_identity || !col_identity) { 1256 SETERRQ(1,"Row and column permutations must be identity for in-place ILU"); 1257 } 1258 1259 outA = inA; 1260 inA->factor = FACTOR_LU; 1261 1262 if (!a->diag) { 1263 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1264 } 1265 1266 a->row = row; 1267 a->col = col; 1268 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1269 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1270 1271 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1272 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1273 PetscLogObjectParent(inA,a->icol); 1274 1275 /* 1276 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1277 for ILU(0) factorization with natural ordering 1278 */ 1279 if (a->bs < 8) { 1280 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1281 } else { 1282 if (!a->solve_work) { 1283 ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1284 PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar)); 1285 } 1286 } 1287 1288 ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr); 1289 1290 PetscFunctionReturn(0); 1291 } 1292 #undef __FUNCT__ 1293 #define __FUNCT__ "MatPrintHelp_SeqBAIJ" 1294 int MatPrintHelp_SeqBAIJ(Mat A) 1295 { 1296 static PetscTruth called = PETSC_FALSE; 1297 MPI_Comm comm = A->comm; 1298 int ierr; 1299 1300 PetscFunctionBegin; 1301 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1302 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1303 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1304 PetscFunctionReturn(0); 1305 } 1306 1307 EXTERN_C_BEGIN 1308 #undef __FUNCT__ 1309 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1310 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices) 1311 { 1312 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1313 int i,nz,nbs; 1314 1315 PetscFunctionBegin; 1316 nz = baij->maxnz/baij->bs2; 1317 nbs = baij->nbs; 1318 for (i=0; i<nz; i++) { 1319 baij->j[i] = indices[i]; 1320 } 1321 baij->nz = nz; 1322 for (i=0; i<nbs; i++) { 1323 baij->ilen[i] = baij->imax[i]; 1324 } 1325 1326 PetscFunctionReturn(0); 1327 } 1328 EXTERN_C_END 1329 1330 #undef __FUNCT__ 1331 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1332 /*@ 1333 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1334 in the matrix. 1335 1336 Input Parameters: 1337 + mat - the SeqBAIJ matrix 1338 - indices - the column indices 1339 1340 Level: advanced 1341 1342 Notes: 1343 This can be called if you have precomputed the nonzero structure of the 1344 matrix and want to provide it to the matrix object to improve the performance 1345 of the MatSetValues() operation. 1346 1347 You MUST have set the correct numbers of nonzeros per row in the call to 1348 MatCreateSeqBAIJ(). 1349 1350 MUST be called before any calls to MatSetValues(); 1351 1352 @*/ 1353 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices) 1354 { 1355 int ierr,(*f)(Mat,int *); 1356 1357 PetscFunctionBegin; 1358 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1359 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1360 if (f) { 1361 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1362 } else { 1363 SETERRQ(1,"Wrong type of matrix to set column indices"); 1364 } 1365 PetscFunctionReturn(0); 1366 } 1367 1368 #undef __FUNCT__ 1369 #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1370 int MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1371 { 1372 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1373 int ierr,i,j,n,row,bs,*ai,*aj,mbs; 1374 PetscReal atmp; 1375 PetscScalar *x,zero = 0.0; 1376 MatScalar *aa; 1377 int ncols,brow,krow,kcol; 1378 1379 PetscFunctionBegin; 1380 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1381 bs = a->bs; 1382 aa = a->a; 1383 ai = a->i; 1384 aj = a->j; 1385 mbs = a->mbs; 1386 1387 ierr = VecSet(&zero,v);CHKERRQ(ierr); 1388 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1389 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1390 if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1391 for (i=0; i<mbs; i++) { 1392 ncols = ai[1] - ai[0]; ai++; 1393 brow = bs*i; 1394 for (j=0; j<ncols; j++){ 1395 /* bcol = bs*(*aj); */ 1396 for (kcol=0; kcol<bs; kcol++){ 1397 for (krow=0; krow<bs; krow++){ 1398 atmp = PetscAbsScalar(*aa); aa++; 1399 row = brow + krow; /* row index */ 1400 /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */ 1401 if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1402 } 1403 } 1404 aj++; 1405 } 1406 } 1407 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1408 PetscFunctionReturn(0); 1409 } 1410 1411 #undef __FUNCT__ 1412 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1413 int MatSetUpPreallocation_SeqBAIJ(Mat A) 1414 { 1415 int ierr; 1416 1417 PetscFunctionBegin; 1418 ierr = MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1419 PetscFunctionReturn(0); 1420 } 1421 1422 #undef __FUNCT__ 1423 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1424 int MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1425 { 1426 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1427 PetscFunctionBegin; 1428 *array = a->a; 1429 PetscFunctionReturn(0); 1430 } 1431 1432 #undef __FUNCT__ 1433 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1434 int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1435 { 1436 PetscFunctionBegin; 1437 PetscFunctionReturn(0); 1438 } 1439 1440 #include "petscblaslapack.h" 1441 #undef __FUNCT__ 1442 #define __FUNCT__ "MatAXPY_SeqBAIJ" 1443 int MatAXPY_SeqBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str) 1444 { 1445 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 1446 int ierr,one=1,i,bs=y->bs,j,bs2; 1447 1448 PetscFunctionBegin; 1449 if (str == SAME_NONZERO_PATTERN) { 1450 BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one); 1451 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1452 if (y->xtoy && y->XtoY != X) { 1453 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1454 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1455 } 1456 if (!y->xtoy) { /* get xtoy */ 1457 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1458 y->XtoY = X; 1459 } 1460 bs2 = bs*bs; 1461 for (i=0; i<x->nz; i++) { 1462 j = 0; 1463 while (j < bs2){ 1464 y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]); 1465 j++; 1466 } 1467 } 1468 PetscLogInfo(0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %d/%d = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)); 1469 } else { 1470 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1471 } 1472 PetscFunctionReturn(0); 1473 } 1474 1475 /* -------------------------------------------------------------------*/ 1476 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 1477 MatGetRow_SeqBAIJ, 1478 MatRestoreRow_SeqBAIJ, 1479 MatMult_SeqBAIJ_N, 1480 /* 4*/ MatMultAdd_SeqBAIJ_N, 1481 MatMultTranspose_SeqBAIJ, 1482 MatMultTransposeAdd_SeqBAIJ, 1483 MatSolve_SeqBAIJ_N, 1484 0, 1485 0, 1486 /*10*/ 0, 1487 MatLUFactor_SeqBAIJ, 1488 0, 1489 0, 1490 MatTranspose_SeqBAIJ, 1491 /*15*/ MatGetInfo_SeqBAIJ, 1492 MatEqual_SeqBAIJ, 1493 MatGetDiagonal_SeqBAIJ, 1494 MatDiagonalScale_SeqBAIJ, 1495 MatNorm_SeqBAIJ, 1496 /*20*/ 0, 1497 MatAssemblyEnd_SeqBAIJ, 1498 0, 1499 MatSetOption_SeqBAIJ, 1500 MatZeroEntries_SeqBAIJ, 1501 /*25*/ MatZeroRows_SeqBAIJ, 1502 MatLUFactorSymbolic_SeqBAIJ, 1503 MatLUFactorNumeric_SeqBAIJ_N, 1504 0, 1505 0, 1506 /*30*/ MatSetUpPreallocation_SeqBAIJ, 1507 MatILUFactorSymbolic_SeqBAIJ, 1508 0, 1509 MatGetArray_SeqBAIJ, 1510 MatRestoreArray_SeqBAIJ, 1511 /*35*/ MatDuplicate_SeqBAIJ, 1512 0, 1513 0, 1514 MatILUFactor_SeqBAIJ, 1515 0, 1516 /*40*/ MatAXPY_SeqBAIJ, 1517 MatGetSubMatrices_SeqBAIJ, 1518 MatIncreaseOverlap_SeqBAIJ, 1519 MatGetValues_SeqBAIJ, 1520 0, 1521 /*45*/ MatPrintHelp_SeqBAIJ, 1522 MatScale_SeqBAIJ, 1523 0, 1524 0, 1525 0, 1526 /*50*/ MatGetBlockSize_SeqBAIJ, 1527 MatGetRowIJ_SeqBAIJ, 1528 MatRestoreRowIJ_SeqBAIJ, 1529 0, 1530 0, 1531 /*55*/ 0, 1532 0, 1533 0, 1534 0, 1535 MatSetValuesBlocked_SeqBAIJ, 1536 /*60*/ MatGetSubMatrix_SeqBAIJ, 1537 MatDestroy_SeqBAIJ, 1538 MatView_SeqBAIJ, 1539 MatGetPetscMaps_Petsc, 1540 0, 1541 /*65*/ 0, 1542 0, 1543 0, 1544 0, 1545 0, 1546 /*70*/ MatGetRowMax_SeqBAIJ, 1547 MatConvert_Basic, 1548 0, 1549 0, 1550 0, 1551 /*75*/ 0, 1552 0, 1553 0, 1554 0, 1555 0, 1556 /*80*/ 0, 1557 0, 1558 0, 1559 0, 1560 /*85*/ MatLoad_SeqBAIJ 1561 }; 1562 1563 EXTERN_C_BEGIN 1564 #undef __FUNCT__ 1565 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 1566 int MatStoreValues_SeqBAIJ(Mat mat) 1567 { 1568 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1569 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1570 int ierr; 1571 1572 PetscFunctionBegin; 1573 if (aij->nonew != 1) { 1574 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1575 } 1576 1577 /* allocate space for values if not already there */ 1578 if (!aij->saved_values) { 1579 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1580 } 1581 1582 /* copy values over */ 1583 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1584 PetscFunctionReturn(0); 1585 } 1586 EXTERN_C_END 1587 1588 EXTERN_C_BEGIN 1589 #undef __FUNCT__ 1590 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 1591 int MatRetrieveValues_SeqBAIJ(Mat mat) 1592 { 1593 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1594 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 1595 1596 PetscFunctionBegin; 1597 if (aij->nonew != 1) { 1598 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1599 } 1600 if (!aij->saved_values) { 1601 SETERRQ(1,"Must call MatStoreValues(A);first"); 1602 } 1603 1604 /* copy values over */ 1605 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1606 PetscFunctionReturn(0); 1607 } 1608 EXTERN_C_END 1609 1610 EXTERN_C_BEGIN 1611 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,Mat*); 1612 EXTERN_C_END 1613 1614 EXTERN_C_BEGIN 1615 #undef __FUNCT__ 1616 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 1617 int MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,int bs,int nz,int *nnz) 1618 { 1619 Mat_SeqBAIJ *b; 1620 int i,len,ierr,mbs,nbs,bs2,newbs = bs; 1621 PetscTruth flg; 1622 1623 PetscFunctionBegin; 1624 1625 B->preallocated = PETSC_TRUE; 1626 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr); 1627 if (nnz && newbs != bs) { 1628 SETERRQ(1,"Cannot change blocksize from command line if setting nnz"); 1629 } 1630 bs = newbs; 1631 1632 mbs = B->m/bs; 1633 nbs = B->n/bs; 1634 bs2 = bs*bs; 1635 1636 if (mbs*bs!=B->m || nbs*bs!=B->n) { 1637 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs); 1638 } 1639 1640 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1641 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1642 if (nnz) { 1643 for (i=0; i<mbs; i++) { 1644 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1645 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs); 1646 } 1647 } 1648 1649 b = (Mat_SeqBAIJ*)B->data; 1650 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1651 B->ops->solve = MatSolve_SeqBAIJ_Update; 1652 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 1653 if (!flg) { 1654 switch (bs) { 1655 case 1: 1656 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1657 B->ops->mult = MatMult_SeqBAIJ_1; 1658 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 1659 break; 1660 case 2: 1661 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 1662 B->ops->mult = MatMult_SeqBAIJ_2; 1663 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 1664 break; 1665 case 3: 1666 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 1667 B->ops->mult = MatMult_SeqBAIJ_3; 1668 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 1669 break; 1670 case 4: 1671 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 1672 B->ops->mult = MatMult_SeqBAIJ_4; 1673 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 1674 break; 1675 case 5: 1676 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 1677 B->ops->mult = MatMult_SeqBAIJ_5; 1678 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 1679 break; 1680 case 6: 1681 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 1682 B->ops->mult = MatMult_SeqBAIJ_6; 1683 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 1684 break; 1685 case 7: 1686 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 1687 B->ops->mult = MatMult_SeqBAIJ_7; 1688 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 1689 break; 1690 default: 1691 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 1692 B->ops->mult = MatMult_SeqBAIJ_N; 1693 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 1694 break; 1695 } 1696 } 1697 b->bs = bs; 1698 b->mbs = mbs; 1699 b->nbs = nbs; 1700 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 1701 if (!nnz) { 1702 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1703 else if (nz <= 0) nz = 1; 1704 for (i=0; i<mbs; i++) b->imax[i] = nz; 1705 nz = nz*mbs; 1706 } else { 1707 nz = 0; 1708 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1709 } 1710 1711 /* allocate the matrix space */ 1712 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 1713 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1714 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1715 b->j = (int*)(b->a + nz*bs2); 1716 ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 1717 b->i = b->j + nz; 1718 b->singlemalloc = PETSC_TRUE; 1719 1720 b->i[0] = 0; 1721 for (i=1; i<mbs+1; i++) { 1722 b->i[i] = b->i[i-1] + b->imax[i-1]; 1723 } 1724 1725 /* b->ilen will count nonzeros in each block row so far. */ 1726 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 1727 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1728 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1729 1730 b->bs = bs; 1731 b->bs2 = bs2; 1732 b->mbs = mbs; 1733 b->nz = 0; 1734 b->maxnz = nz*bs2; 1735 B->info.nz_unneeded = (PetscReal)b->maxnz; 1736 PetscFunctionReturn(0); 1737 } 1738 EXTERN_C_END 1739 1740 /*MC 1741 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 1742 block sparse compressed row format. 1743 1744 Options Database Keys: 1745 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 1746 1747 Level: beginner 1748 1749 .seealso: MatCreateSeqBAIJ 1750 M*/ 1751 1752 EXTERN_C_BEGIN 1753 #undef __FUNCT__ 1754 #define __FUNCT__ "MatCreate_SeqBAIJ" 1755 int MatCreate_SeqBAIJ(Mat B) 1756 { 1757 int ierr,size; 1758 Mat_SeqBAIJ *b; 1759 1760 PetscFunctionBegin; 1761 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1762 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1763 1764 B->m = B->M = PetscMax(B->m,B->M); 1765 B->n = B->N = PetscMax(B->n,B->N); 1766 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 1767 B->data = (void*)b; 1768 ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 1769 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1770 B->factor = 0; 1771 B->lupivotthreshold = 1.0; 1772 B->mapping = 0; 1773 b->row = 0; 1774 b->col = 0; 1775 b->icol = 0; 1776 b->reallocs = 0; 1777 b->saved_values = 0; 1778 #if defined(PETSC_USE_MAT_SINGLE) 1779 b->setvalueslen = 0; 1780 b->setvaluescopy = PETSC_NULL; 1781 #endif 1782 1783 ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr); 1784 ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr); 1785 1786 b->sorted = PETSC_FALSE; 1787 b->roworiented = PETSC_TRUE; 1788 b->nonew = 0; 1789 b->diag = 0; 1790 b->solve_work = 0; 1791 b->mult_work = 0; 1792 B->spptr = 0; 1793 B->info.nz_unneeded = (PetscReal)b->maxnz; 1794 b->keepzeroedrows = PETSC_FALSE; 1795 b->xtoy = 0; 1796 b->XtoY = 0; 1797 1798 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1799 "MatStoreValues_SeqBAIJ", 1800 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 1801 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1802 "MatRetrieveValues_SeqBAIJ", 1803 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 1804 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 1805 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 1806 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 1807 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 1808 "MatConvert_SeqBAIJ_SeqAIJ", 1809 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 1810 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 1811 "MatSeqBAIJSetPreallocation_SeqBAIJ", 1812 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 1813 PetscFunctionReturn(0); 1814 } 1815 EXTERN_C_END 1816 1817 #undef __FUNCT__ 1818 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 1819 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1820 { 1821 Mat C; 1822 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 1823 int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr; 1824 1825 PetscFunctionBegin; 1826 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1827 1828 *B = 0; 1829 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1830 ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr); 1831 c = (Mat_SeqBAIJ*)C->data; 1832 1833 c->bs = a->bs; 1834 c->bs2 = a->bs2; 1835 c->mbs = a->mbs; 1836 c->nbs = a->nbs; 1837 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1838 1839 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1840 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1841 for (i=0; i<mbs; i++) { 1842 c->imax[i] = a->imax[i]; 1843 c->ilen[i] = a->ilen[i]; 1844 } 1845 1846 /* allocate the matrix space */ 1847 c->singlemalloc = PETSC_TRUE; 1848 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1849 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1850 c->j = (int*)(c->a + nz*bs2); 1851 c->i = c->j + nz; 1852 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1853 if (mbs > 0) { 1854 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1855 if (cpvalues == MAT_COPY_VALUES) { 1856 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1857 } else { 1858 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1859 } 1860 } 1861 1862 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1863 c->sorted = a->sorted; 1864 c->roworiented = a->roworiented; 1865 c->nonew = a->nonew; 1866 1867 if (a->diag) { 1868 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1869 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1870 for (i=0; i<mbs; i++) { 1871 c->diag[i] = a->diag[i]; 1872 } 1873 } else c->diag = 0; 1874 c->nz = a->nz; 1875 c->maxnz = a->maxnz; 1876 c->solve_work = 0; 1877 C->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1878 c->mult_work = 0; 1879 C->preallocated = PETSC_TRUE; 1880 C->assembled = PETSC_TRUE; 1881 *B = C; 1882 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1883 PetscFunctionReturn(0); 1884 } 1885 1886 #undef __FUNCT__ 1887 #define __FUNCT__ "MatLoad_SeqBAIJ" 1888 int MatLoad_SeqBAIJ(PetscViewer viewer,const MatType type,Mat *A) 1889 { 1890 Mat_SeqBAIJ *a; 1891 Mat B; 1892 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1893 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1894 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1895 int *masked,nmask,tmp,bs2,ishift; 1896 PetscScalar *aa; 1897 MPI_Comm comm = ((PetscObject)viewer)->comm; 1898 1899 PetscFunctionBegin; 1900 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1901 bs2 = bs*bs; 1902 1903 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1904 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1905 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1906 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1907 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1908 M = header[1]; N = header[2]; nz = header[3]; 1909 1910 if (header[3] < 0) { 1911 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 1912 } 1913 1914 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1915 1916 /* 1917 This code adds extra rows to make sure the number of rows is 1918 divisible by the blocksize 1919 */ 1920 mbs = M/bs; 1921 extra_rows = bs - M + bs*(mbs); 1922 if (extra_rows == bs) extra_rows = 0; 1923 else mbs++; 1924 if (extra_rows) { 1925 PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 1926 } 1927 1928 /* read in row lengths */ 1929 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1930 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1931 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1932 1933 /* read in column indices */ 1934 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1935 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1936 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1937 1938 /* loop over row lengths determining block row lengths */ 1939 ierr = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr); 1940 ierr = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1941 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1942 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1943 masked = mask + mbs; 1944 rowcount = 0; nzcount = 0; 1945 for (i=0; i<mbs; i++) { 1946 nmask = 0; 1947 for (j=0; j<bs; j++) { 1948 kmax = rowlengths[rowcount]; 1949 for (k=0; k<kmax; k++) { 1950 tmp = jj[nzcount++]/bs; 1951 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1952 } 1953 rowcount++; 1954 } 1955 browlengths[i] += nmask; 1956 /* zero out the mask elements we set */ 1957 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1958 } 1959 1960 /* create our matrix */ 1961 ierr = MatCreate(comm,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows,&B); 1962 ierr = MatSetType(B,type);CHKERRQ(ierr); 1963 ierr = MatSeqBAIJSetPreallocation(B,bs,0,browlengths);CHKERRQ(ierr); 1964 a = (Mat_SeqBAIJ*)B->data; 1965 1966 /* set matrix "i" values */ 1967 a->i[0] = 0; 1968 for (i=1; i<= mbs; i++) { 1969 a->i[i] = a->i[i-1] + browlengths[i-1]; 1970 a->ilen[i-1] = browlengths[i-1]; 1971 } 1972 a->nz = 0; 1973 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 1974 1975 /* read in nonzero values */ 1976 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1977 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1978 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1979 1980 /* set "a" and "j" values into matrix */ 1981 nzcount = 0; jcount = 0; 1982 for (i=0; i<mbs; i++) { 1983 nzcountb = nzcount; 1984 nmask = 0; 1985 for (j=0; j<bs; j++) { 1986 kmax = rowlengths[i*bs+j]; 1987 for (k=0; k<kmax; k++) { 1988 tmp = jj[nzcount++]/bs; 1989 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1990 } 1991 } 1992 /* sort the masked values */ 1993 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1994 1995 /* set "j" values into matrix */ 1996 maskcount = 1; 1997 for (j=0; j<nmask; j++) { 1998 a->j[jcount++] = masked[j]; 1999 mask[masked[j]] = maskcount++; 2000 } 2001 /* set "a" values into matrix */ 2002 ishift = bs2*a->i[i]; 2003 for (j=0; j<bs; j++) { 2004 kmax = rowlengths[i*bs+j]; 2005 for (k=0; k<kmax; k++) { 2006 tmp = jj[nzcountb]/bs ; 2007 block = mask[tmp] - 1; 2008 point = jj[nzcountb] - bs*tmp; 2009 idx = ishift + bs2*block + j + bs*point; 2010 a->a[idx] = (MatScalar)aa[nzcountb++]; 2011 } 2012 } 2013 /* zero out the mask elements we set */ 2014 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2015 } 2016 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 2017 2018 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2019 ierr = PetscFree(browlengths);CHKERRQ(ierr); 2020 ierr = PetscFree(aa);CHKERRQ(ierr); 2021 ierr = PetscFree(jj);CHKERRQ(ierr); 2022 ierr = PetscFree(mask);CHKERRQ(ierr); 2023 2024 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2025 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2026 ierr = MatView_Private(B);CHKERRQ(ierr); 2027 2028 *A = B; 2029 PetscFunctionReturn(0); 2030 } 2031 2032 #undef __FUNCT__ 2033 #define __FUNCT__ "MatCreateSeqBAIJ" 2034 /*@C 2035 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 2036 compressed row) format. For good matrix assembly performance the 2037 user should preallocate the matrix storage by setting the parameter nz 2038 (or the array nnz). By setting these parameters accurately, performance 2039 during matrix assembly can be increased by more than a factor of 50. 2040 2041 Collective on MPI_Comm 2042 2043 Input Parameters: 2044 + comm - MPI communicator, set to PETSC_COMM_SELF 2045 . bs - size of block 2046 . m - number of rows 2047 . n - number of columns 2048 . nz - number of nonzero blocks per block row (same for all rows) 2049 - nnz - array containing the number of nonzero blocks in the various block rows 2050 (possibly different for each block row) or PETSC_NULL 2051 2052 Output Parameter: 2053 . A - the matrix 2054 2055 Options Database Keys: 2056 . -mat_no_unroll - uses code that does not unroll the loops in the 2057 block calculations (much slower) 2058 . -mat_block_size - size of the blocks to use 2059 2060 Level: intermediate 2061 2062 Notes: 2063 A nonzero block is any block that as 1 or more nonzeros in it 2064 2065 The block AIJ format is fully compatible with standard Fortran 77 2066 storage. That is, the stored row and column indices can begin at 2067 either one (as in Fortran) or zero. See the users' manual for details. 2068 2069 Specify the preallocated storage with either nz or nnz (not both). 2070 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2071 allocation. For additional details, see the users manual chapter on 2072 matrices. 2073 2074 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2075 @*/ 2076 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A) 2077 { 2078 int ierr; 2079 2080 PetscFunctionBegin; 2081 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 2082 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2083 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 2084 PetscFunctionReturn(0); 2085 } 2086 2087 #undef __FUNCT__ 2088 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 2089 /*@C 2090 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 2091 per row in the matrix. For good matrix assembly performance the 2092 user should preallocate the matrix storage by setting the parameter nz 2093 (or the array nnz). By setting these parameters accurately, performance 2094 during matrix assembly can be increased by more than a factor of 50. 2095 2096 Collective on MPI_Comm 2097 2098 Input Parameters: 2099 + A - the matrix 2100 . bs - size of block 2101 . nz - number of block nonzeros per block row (same for all rows) 2102 - nnz - array containing the number of block nonzeros in the various block rows 2103 (possibly different for each block row) or PETSC_NULL 2104 2105 Options Database Keys: 2106 . -mat_no_unroll - uses code that does not unroll the loops in the 2107 block calculations (much slower) 2108 . -mat_block_size - size of the blocks to use 2109 2110 Level: intermediate 2111 2112 Notes: 2113 The block AIJ format is fully compatible with standard Fortran 77 2114 storage. That is, the stored row and column indices can begin at 2115 either one (as in Fortran) or zero. See the users' manual for details. 2116 2117 Specify the preallocated storage with either nz or nnz (not both). 2118 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2119 allocation. For additional details, see the users manual chapter on 2120 matrices. 2121 2122 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2123 @*/ 2124 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[]) 2125 { 2126 int ierr,(*f)(Mat,int,int,const int[]); 2127 2128 PetscFunctionBegin; 2129 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2130 if (f) { 2131 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 2132 } 2133 PetscFunctionReturn(0); 2134 } 2135