1 /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/ 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" 8 #include "src/vec/vecimpl.h" 9 #include "src/inline/spops.h" 10 #include "petscsys.h" /*I "petscmat.h" I*/ 11 12 /* 13 Special version for Fun3d sequential benchmark 14 */ 15 #if defined(PETSC_HAVE_FORTRAN_CAPS) 16 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 17 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 18 #define matsetvaluesblocked4_ matsetvaluesblocked4 19 #endif 20 21 EXTERN_C_BEGIN 22 #undef __FUNCT__ 23 #define __FUNCT__ "matsetvaluesblocked4_" 24 void matsetvaluesblocked4_(Mat *AA,int *mm,const int im[],int *nn,const int in[],const PetscScalar v[]) 25 { 26 Mat A = *AA; 27 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 28 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 29 int *ai=a->i,*ailen=a->ilen; 30 int *aj=a->j,stepval; 31 const PetscScalar *value = v; 32 MatScalar *ap,*aa = a->a,*bap; 33 34 PetscFunctionBegin; 35 stepval = (n-1)*4; 36 for (k=0; k<m; k++) { /* loop over added rows */ 37 row = im[k]; 38 rp = aj + ai[row]; 39 ap = aa + 16*ai[row]; 40 nrow = ailen[row]; 41 low = 0; 42 for (l=0; l<n; l++) { /* loop over added columns */ 43 col = in[l]; 44 value = v + k*(stepval+4)*4 + l*4; 45 low = 0; high = nrow; 46 while (high-low > 7) { 47 t = (low+high)/2; 48 if (rp[t] > col) high = t; 49 else low = t; 50 } 51 for (i=low; i<high; i++) { 52 if (rp[i] > col) break; 53 if (rp[i] == col) { 54 bap = ap + 16*i; 55 for (ii=0; ii<4; ii++,value+=stepval) { 56 for (jj=ii; jj<16; jj+=4) { 57 bap[jj] += *value++; 58 } 59 } 60 goto noinsert2; 61 } 62 } 63 N = nrow++ - 1; 64 /* shift up all the later entries in this row */ 65 for (ii=N; ii>=i; ii--) { 66 rp[ii+1] = rp[ii]; 67 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 68 } 69 if (N >= i) { 70 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 71 } 72 rp[i] = col; 73 bap = ap + 16*i; 74 for (ii=0; ii<4; ii++,value+=stepval) { 75 for (jj=ii; jj<16; jj+=4) { 76 bap[jj] = *value++; 77 } 78 } 79 noinsert2:; 80 low = i; 81 } 82 ailen[row] = nrow; 83 } 84 } 85 EXTERN_C_END 86 87 #if defined(PETSC_HAVE_FORTRAN_CAPS) 88 #define matsetvalues4_ MATSETVALUES4 89 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 90 #define matsetvalues4_ matsetvalues4 91 #endif 92 93 EXTERN_C_BEGIN 94 #undef __FUNCT__ 95 #define __FUNCT__ "MatSetValues4_" 96 void matsetvalues4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v) 97 { 98 Mat A = *AA; 99 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 100 int *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 101 int *ai=a->i,*ailen=a->ilen; 102 int *aj=a->j,brow,bcol; 103 int ridx,cidx; 104 MatScalar *ap,value,*aa=a->a,*bap; 105 106 PetscFunctionBegin; 107 for (k=0; k<m; k++) { /* loop over added rows */ 108 row = im[k]; brow = row/4; 109 rp = aj + ai[brow]; 110 ap = aa + 16*ai[brow]; 111 nrow = ailen[brow]; 112 low = 0; 113 for (l=0; l<n; l++) { /* loop over added columns */ 114 col = in[l]; bcol = col/4; 115 ridx = row % 4; cidx = col % 4; 116 value = v[l + k*n]; 117 low = 0; high = nrow; 118 while (high-low > 7) { 119 t = (low+high)/2; 120 if (rp[t] > bcol) high = t; 121 else low = t; 122 } 123 for (i=low; i<high; i++) { 124 if (rp[i] > bcol) break; 125 if (rp[i] == bcol) { 126 bap = ap + 16*i + 4*cidx + ridx; 127 *bap += value; 128 goto noinsert1; 129 } 130 } 131 N = nrow++ - 1; 132 /* shift up all the later entries in this row */ 133 for (ii=N; ii>=i; ii--) { 134 rp[ii+1] = rp[ii]; 135 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 136 } 137 if (N>=i) { 138 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 139 } 140 rp[i] = bcol; 141 ap[16*i + 4*cidx + ridx] = value; 142 noinsert1:; 143 low = i; 144 } 145 ailen[brow] = nrow; 146 } 147 } 148 EXTERN_C_END 149 150 /* UGLY, ugly, ugly 151 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 152 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 153 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 154 converts the entries into single precision and then calls ..._MatScalar() to put them 155 into the single precision data structures. 156 */ 157 #if defined(PETSC_USE_MAT_SINGLE) 158 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 159 #else 160 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 161 #endif 162 163 #define CHUNKSIZE 10 164 165 /* 166 Checks for missing diagonals 167 */ 168 #undef __FUNCT__ 169 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 170 int MatMissingDiagonal_SeqBAIJ(Mat A) 171 { 172 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 173 int *diag,*jj = a->j,i,ierr; 174 175 PetscFunctionBegin; 176 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 177 diag = a->diag; 178 for (i=0; i<a->mbs; i++) { 179 if (jj[diag[i]] != i) { 180 SETERRQ1(1,"Matrix is missing diagonal number %d",i); 181 } 182 } 183 PetscFunctionReturn(0); 184 } 185 186 #undef __FUNCT__ 187 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 188 int MatMarkDiagonal_SeqBAIJ(Mat A) 189 { 190 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 191 int i,j,*diag,m = a->mbs,ierr; 192 193 PetscFunctionBegin; 194 if (a->diag) PetscFunctionReturn(0); 195 196 ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr); 197 PetscLogObjectMemory(A,(m+1)*sizeof(int)); 198 for (i=0; i<m; i++) { 199 diag[i] = a->i[i+1]; 200 for (j=a->i[i]; j<a->i[i+1]; j++) { 201 if (a->j[j] == i) { 202 diag[i] = j; 203 break; 204 } 205 } 206 } 207 a->diag = diag; 208 PetscFunctionReturn(0); 209 } 210 211 212 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 213 214 #undef __FUNCT__ 215 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 216 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done) 217 { 218 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 219 int ierr,n = a->mbs,i; 220 221 PetscFunctionBegin; 222 *nn = n; 223 if (!ia) PetscFunctionReturn(0); 224 if (symmetric) { 225 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 226 } else if (oshift == 1) { 227 /* temporarily add 1 to i and j indices */ 228 int nz = a->i[n]; 229 for (i=0; i<nz; i++) a->j[i]++; 230 for (i=0; i<n+1; i++) a->i[i]++; 231 *ia = a->i; *ja = a->j; 232 } else { 233 *ia = a->i; *ja = a->j; 234 } 235 236 PetscFunctionReturn(0); 237 } 238 239 #undef __FUNCT__ 240 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 241 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done) 242 { 243 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 244 int i,n = a->mbs,ierr; 245 246 PetscFunctionBegin; 247 if (!ia) PetscFunctionReturn(0); 248 if (symmetric) { 249 ierr = PetscFree(*ia);CHKERRQ(ierr); 250 ierr = PetscFree(*ja);CHKERRQ(ierr); 251 } else if (oshift == 1) { 252 int nz = a->i[n]-1; 253 for (i=0; i<nz; i++) a->j[i]--; 254 for (i=0; i<n+1; i++) a->i[i]--; 255 } 256 PetscFunctionReturn(0); 257 } 258 259 #undef __FUNCT__ 260 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ" 261 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs) 262 { 263 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 264 265 PetscFunctionBegin; 266 *bs = baij->bs; 267 PetscFunctionReturn(0); 268 } 269 270 271 #undef __FUNCT__ 272 #define __FUNCT__ "MatDestroy_SeqBAIJ" 273 int MatDestroy_SeqBAIJ(Mat A) 274 { 275 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 276 int ierr; 277 278 PetscFunctionBegin; 279 #if defined(PETSC_USE_LOG) 280 PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz); 281 #endif 282 ierr = PetscFree(a->a);CHKERRQ(ierr); 283 if (!a->singlemalloc) { 284 ierr = PetscFree(a->i);CHKERRQ(ierr); 285 ierr = PetscFree(a->j);CHKERRQ(ierr); 286 } 287 if (a->row) { 288 ierr = ISDestroy(a->row);CHKERRQ(ierr); 289 } 290 if (a->col) { 291 ierr = ISDestroy(a->col);CHKERRQ(ierr); 292 } 293 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 294 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 295 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 296 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 297 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 298 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 299 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 300 #if defined(PETSC_USE_MAT_SINGLE) 301 if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);} 302 #endif 303 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 304 305 ierr = PetscFree(a);CHKERRQ(ierr); 306 PetscFunctionReturn(0); 307 } 308 309 #undef __FUNCT__ 310 #define __FUNCT__ "MatSetOption_SeqBAIJ" 311 int MatSetOption_SeqBAIJ(Mat A,MatOption op) 312 { 313 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 314 315 PetscFunctionBegin; 316 switch (op) { 317 case MAT_ROW_ORIENTED: 318 a->roworiented = PETSC_TRUE; 319 break; 320 case MAT_COLUMN_ORIENTED: 321 a->roworiented = PETSC_FALSE; 322 break; 323 case MAT_COLUMNS_SORTED: 324 a->sorted = PETSC_TRUE; 325 break; 326 case MAT_COLUMNS_UNSORTED: 327 a->sorted = PETSC_FALSE; 328 break; 329 case MAT_KEEP_ZEROED_ROWS: 330 a->keepzeroedrows = PETSC_TRUE; 331 break; 332 case MAT_NO_NEW_NONZERO_LOCATIONS: 333 a->nonew = 1; 334 break; 335 case MAT_NEW_NONZERO_LOCATION_ERR: 336 a->nonew = -1; 337 break; 338 case MAT_NEW_NONZERO_ALLOCATION_ERR: 339 a->nonew = -2; 340 break; 341 case MAT_YES_NEW_NONZERO_LOCATIONS: 342 a->nonew = 0; 343 break; 344 case MAT_ROWS_SORTED: 345 case MAT_ROWS_UNSORTED: 346 case MAT_YES_NEW_DIAGONALS: 347 case MAT_IGNORE_OFF_PROC_ENTRIES: 348 case MAT_USE_HASH_TABLE: 349 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 350 break; 351 case MAT_NO_NEW_DIAGONALS: 352 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 353 default: 354 SETERRQ(PETSC_ERR_SUP,"unknown option"); 355 } 356 PetscFunctionReturn(0); 357 } 358 359 #undef __FUNCT__ 360 #define __FUNCT__ "MatGetRow_SeqBAIJ" 361 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 362 { 363 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 364 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr; 365 MatScalar *aa,*aa_i; 366 PetscScalar *v_i; 367 368 PetscFunctionBegin; 369 bs = a->bs; 370 ai = a->i; 371 aj = a->j; 372 aa = a->a; 373 bs2 = a->bs2; 374 375 if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %d out of range", row); 376 377 bn = row/bs; /* Block number */ 378 bp = row % bs; /* Block Position */ 379 M = ai[bn+1] - ai[bn]; 380 *nz = bs*M; 381 382 if (v) { 383 *v = 0; 384 if (*nz) { 385 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 386 for (i=0; i<M; i++) { /* for each block in the block row */ 387 v_i = *v + i*bs; 388 aa_i = aa + bs2*(ai[bn] + i); 389 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 390 } 391 } 392 } 393 394 if (idx) { 395 *idx = 0; 396 if (*nz) { 397 ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr); 398 for (i=0; i<M; i++) { /* for each block in the block row */ 399 idx_i = *idx + i*bs; 400 itmp = bs*aj[ai[bn] + i]; 401 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 402 } 403 } 404 } 405 PetscFunctionReturn(0); 406 } 407 408 #undef __FUNCT__ 409 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 410 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 411 { 412 int ierr; 413 414 PetscFunctionBegin; 415 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 416 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 417 PetscFunctionReturn(0); 418 } 419 420 #undef __FUNCT__ 421 #define __FUNCT__ "MatTranspose_SeqBAIJ" 422 int MatTranspose_SeqBAIJ(Mat A,Mat *B) 423 { 424 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 425 Mat C; 426 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 427 int *rows,*cols,bs2=a->bs2; 428 PetscScalar *array; 429 430 PetscFunctionBegin; 431 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 432 ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr); 433 ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr); 434 435 #if defined(PETSC_USE_MAT_SINGLE) 436 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 437 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 438 #else 439 array = a->a; 440 #endif 441 442 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 443 ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr); 444 ierr = PetscFree(col);CHKERRQ(ierr); 445 ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr); 446 cols = rows + bs; 447 for (i=0; i<mbs; i++) { 448 cols[0] = i*bs; 449 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 450 len = ai[i+1] - ai[i]; 451 for (j=0; j<len; j++) { 452 rows[0] = (*aj++)*bs; 453 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 454 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 455 array += bs2; 456 } 457 } 458 ierr = PetscFree(rows);CHKERRQ(ierr); 459 #if defined(PETSC_USE_MAT_SINGLE) 460 ierr = PetscFree(array); 461 #endif 462 463 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 464 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 465 466 if (B) { 467 *B = C; 468 } else { 469 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 470 } 471 PetscFunctionReturn(0); 472 } 473 474 #undef __FUNCT__ 475 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 476 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 477 { 478 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 479 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 480 PetscScalar *aa; 481 FILE *file; 482 483 PetscFunctionBegin; 484 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 485 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 486 col_lens[0] = MAT_FILE_COOKIE; 487 488 col_lens[1] = A->m; 489 col_lens[2] = A->n; 490 col_lens[3] = a->nz*bs2; 491 492 /* store lengths of each row and write (including header) to file */ 493 count = 0; 494 for (i=0; i<a->mbs; i++) { 495 for (j=0; j<bs; j++) { 496 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 497 } 498 } 499 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 500 ierr = PetscFree(col_lens);CHKERRQ(ierr); 501 502 /* store column indices (zero start index) */ 503 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 504 count = 0; 505 for (i=0; i<a->mbs; i++) { 506 for (j=0; j<bs; j++) { 507 for (k=a->i[i]; k<a->i[i+1]; k++) { 508 for (l=0; l<bs; l++) { 509 jj[count++] = bs*a->j[k] + l; 510 } 511 } 512 } 513 } 514 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr); 515 ierr = PetscFree(jj);CHKERRQ(ierr); 516 517 /* store nonzero values */ 518 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 519 count = 0; 520 for (i=0; i<a->mbs; i++) { 521 for (j=0; j<bs; j++) { 522 for (k=a->i[i]; k<a->i[i+1]; k++) { 523 for (l=0; l<bs; l++) { 524 aa[count++] = a->a[bs2*k + l*bs + j]; 525 } 526 } 527 } 528 } 529 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr); 530 ierr = PetscFree(aa);CHKERRQ(ierr); 531 532 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 533 if (file) { 534 fprintf(file,"-matload_block_size %d\n",a->bs); 535 } 536 PetscFunctionReturn(0); 537 } 538 539 #undef __FUNCT__ 540 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 541 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 542 { 543 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 544 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 545 PetscViewerFormat format; 546 547 PetscFunctionBegin; 548 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 549 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 550 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 551 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 552 Mat aij; 553 ierr = MatConvert(A,MATSEQAIJ,&aij);CHKERRQ(ierr); 554 ierr = MatView(aij,viewer);CHKERRQ(ierr); 555 ierr = MatDestroy(aij);CHKERRQ(ierr); 556 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 557 PetscFunctionReturn(0); 558 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 559 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 560 for (i=0; i<a->mbs; i++) { 561 for (j=0; j<bs; j++) { 562 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 563 for (k=a->i[i]; k<a->i[i+1]; k++) { 564 for (l=0; l<bs; l++) { 565 #if defined(PETSC_USE_COMPLEX) 566 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 567 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %gi) ",bs*a->j[k]+l, 568 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 569 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 570 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %gi) ",bs*a->j[k]+l, 571 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 572 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 573 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 574 } 575 #else 576 if (a->a[bs2*k + l*bs + j] != 0.0) { 577 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 578 } 579 #endif 580 } 581 } 582 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 583 } 584 } 585 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 586 } else { 587 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 588 for (i=0; i<a->mbs; i++) { 589 for (j=0; j<bs; j++) { 590 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 591 for (k=a->i[i]; k<a->i[i+1]; k++) { 592 for (l=0; l<bs; l++) { 593 #if defined(PETSC_USE_COMPLEX) 594 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 595 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 596 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 597 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 598 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 599 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 600 } else { 601 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 602 } 603 #else 604 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 605 #endif 606 } 607 } 608 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 609 } 610 } 611 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 612 } 613 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 614 PetscFunctionReturn(0); 615 } 616 617 #undef __FUNCT__ 618 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 619 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 620 { 621 Mat A = (Mat) Aa; 622 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 623 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2; 624 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 625 MatScalar *aa; 626 PetscViewer viewer; 627 628 PetscFunctionBegin; 629 630 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 631 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 632 633 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 634 635 /* loop over matrix elements drawing boxes */ 636 color = PETSC_DRAW_BLUE; 637 for (i=0,row=0; i<mbs; i++,row+=bs) { 638 for (j=a->i[i]; j<a->i[i+1]; j++) { 639 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 640 x_l = a->j[j]*bs; x_r = x_l + 1.0; 641 aa = a->a + j*bs2; 642 for (k=0; k<bs; k++) { 643 for (l=0; l<bs; l++) { 644 if (PetscRealPart(*aa++) >= 0.) continue; 645 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 646 } 647 } 648 } 649 } 650 color = PETSC_DRAW_CYAN; 651 for (i=0,row=0; i<mbs; i++,row+=bs) { 652 for (j=a->i[i]; j<a->i[i+1]; j++) { 653 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 654 x_l = a->j[j]*bs; x_r = x_l + 1.0; 655 aa = a->a + j*bs2; 656 for (k=0; k<bs; k++) { 657 for (l=0; l<bs; l++) { 658 if (PetscRealPart(*aa++) != 0.) continue; 659 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 660 } 661 } 662 } 663 } 664 665 color = PETSC_DRAW_RED; 666 for (i=0,row=0; i<mbs; i++,row+=bs) { 667 for (j=a->i[i]; j<a->i[i+1]; j++) { 668 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 669 x_l = a->j[j]*bs; x_r = x_l + 1.0; 670 aa = a->a + j*bs2; 671 for (k=0; k<bs; k++) { 672 for (l=0; l<bs; l++) { 673 if (PetscRealPart(*aa++) <= 0.) continue; 674 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 675 } 676 } 677 } 678 } 679 PetscFunctionReturn(0); 680 } 681 682 #undef __FUNCT__ 683 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 684 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 685 { 686 int ierr; 687 PetscReal xl,yl,xr,yr,w,h; 688 PetscDraw draw; 689 PetscTruth isnull; 690 691 PetscFunctionBegin; 692 693 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 694 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 695 696 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 697 xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0; 698 xr += w; yr += h; xl = -w; yl = -h; 699 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 700 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 701 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 702 PetscFunctionReturn(0); 703 } 704 705 #undef __FUNCT__ 706 #define __FUNCT__ "MatView_SeqBAIJ" 707 int MatView_SeqBAIJ(Mat A,PetscViewer viewer) 708 { 709 int ierr; 710 PetscTruth issocket,isascii,isbinary,isdraw; 711 712 PetscFunctionBegin; 713 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 714 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 715 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 716 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 717 if (issocket) { 718 SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 719 } else if (isascii){ 720 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 721 } else if (isbinary) { 722 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 723 } else if (isdraw) { 724 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 725 } else { 726 SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name); 727 } 728 PetscFunctionReturn(0); 729 } 730 731 732 #undef __FUNCT__ 733 #define __FUNCT__ "MatGetValues_SeqBAIJ" 734 int MatGetValues_SeqBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[]) 735 { 736 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 737 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 738 int *ai = a->i,*ailen = a->ilen; 739 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 740 MatScalar *ap,*aa = a->a,zero = 0.0; 741 742 PetscFunctionBegin; 743 for (k=0; k<m; k++) { /* loop over rows */ 744 row = im[k]; brow = row/bs; 745 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 746 if (row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %d too large", row); 747 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 748 nrow = ailen[brow]; 749 for (l=0; l<n; l++) { /* loop over columns */ 750 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 751 if (in[l] >= A->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %d too large", in[l]); 752 col = in[l] ; 753 bcol = col/bs; 754 cidx = col%bs; 755 ridx = row%bs; 756 high = nrow; 757 low = 0; /* assume unsorted */ 758 while (high-low > 5) { 759 t = (low+high)/2; 760 if (rp[t] > bcol) high = t; 761 else low = t; 762 } 763 for (i=low; i<high; i++) { 764 if (rp[i] > bcol) break; 765 if (rp[i] == bcol) { 766 *v++ = ap[bs2*i+bs*cidx+ridx]; 767 goto finished; 768 } 769 } 770 *v++ = zero; 771 finished:; 772 } 773 } 774 PetscFunctionReturn(0); 775 } 776 777 #if defined(PETSC_USE_MAT_SINGLE) 778 #undef __FUNCT__ 779 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 780 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 781 { 782 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 783 int ierr,i,N = m*n*b->bs2; 784 MatScalar *vsingle; 785 786 PetscFunctionBegin; 787 if (N > b->setvalueslen) { 788 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 789 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 790 b->setvalueslen = N; 791 } 792 vsingle = b->setvaluescopy; 793 for (i=0; i<N; i++) { 794 vsingle[i] = v[i]; 795 } 796 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 797 PetscFunctionReturn(0); 798 } 799 #endif 800 801 802 #undef __FUNCT__ 803 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 804 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode is) 805 { 806 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 807 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 808 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 809 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 810 PetscTruth roworiented=a->roworiented; 811 const MatScalar *value = v; 812 MatScalar *ap,*aa = a->a,*bap; 813 814 PetscFunctionBegin; 815 if (roworiented) { 816 stepval = (n-1)*bs; 817 } else { 818 stepval = (m-1)*bs; 819 } 820 for (k=0; k<m; k++) { /* loop over added rows */ 821 row = im[k]; 822 if (row < 0) continue; 823 #if defined(PETSC_USE_BOPT_g) 824 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1); 825 #endif 826 rp = aj + ai[row]; 827 ap = aa + bs2*ai[row]; 828 rmax = imax[row]; 829 nrow = ailen[row]; 830 low = 0; 831 for (l=0; l<n; l++) { /* loop over added columns */ 832 if (in[l] < 0) continue; 833 #if defined(PETSC_USE_BOPT_g) 834 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],a->nbs-1); 835 #endif 836 col = in[l]; 837 if (roworiented) { 838 value = v + k*(stepval+bs)*bs + l*bs; 839 } else { 840 value = v + l*(stepval+bs)*bs + k*bs; 841 } 842 if (!sorted) low = 0; high = nrow; 843 while (high-low > 7) { 844 t = (low+high)/2; 845 if (rp[t] > col) high = t; 846 else low = t; 847 } 848 for (i=low; i<high; i++) { 849 if (rp[i] > col) break; 850 if (rp[i] == col) { 851 bap = ap + bs2*i; 852 if (roworiented) { 853 if (is == ADD_VALUES) { 854 for (ii=0; ii<bs; ii++,value+=stepval) { 855 for (jj=ii; jj<bs2; jj+=bs) { 856 bap[jj] += *value++; 857 } 858 } 859 } else { 860 for (ii=0; ii<bs; ii++,value+=stepval) { 861 for (jj=ii; jj<bs2; jj+=bs) { 862 bap[jj] = *value++; 863 } 864 } 865 } 866 } else { 867 if (is == ADD_VALUES) { 868 for (ii=0; ii<bs; ii++,value+=stepval) { 869 for (jj=0; jj<bs; jj++) { 870 *bap++ += *value++; 871 } 872 } 873 } else { 874 for (ii=0; ii<bs; ii++,value+=stepval) { 875 for (jj=0; jj<bs; jj++) { 876 *bap++ = *value++; 877 } 878 } 879 } 880 } 881 goto noinsert2; 882 } 883 } 884 if (nonew == 1) goto noinsert2; 885 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 886 if (nrow >= rmax) { 887 /* there is no extra room in row, therefore enlarge */ 888 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 889 MatScalar *new_a; 890 891 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 892 893 /* malloc new storage space */ 894 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 895 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 896 new_j = (int*)(new_a + bs2*new_nz); 897 new_i = new_j + new_nz; 898 899 /* copy over old data into new slots */ 900 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 901 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 902 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 903 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 904 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 905 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 906 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 907 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 908 /* free up old matrix storage */ 909 ierr = PetscFree(a->a);CHKERRQ(ierr); 910 if (!a->singlemalloc) { 911 ierr = PetscFree(a->i);CHKERRQ(ierr); 912 ierr = PetscFree(a->j);CHKERRQ(ierr); 913 } 914 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 915 a->singlemalloc = PETSC_TRUE; 916 917 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 918 rmax = imax[row] = imax[row] + CHUNKSIZE; 919 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 920 a->maxnz += bs2*CHUNKSIZE; 921 a->reallocs++; 922 a->nz++; 923 } 924 N = nrow++ - 1; 925 /* shift up all the later entries in this row */ 926 for (ii=N; ii>=i; ii--) { 927 rp[ii+1] = rp[ii]; 928 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 929 } 930 if (N >= i) { 931 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 932 } 933 rp[i] = col; 934 bap = ap + bs2*i; 935 if (roworiented) { 936 for (ii=0; ii<bs; ii++,value+=stepval) { 937 for (jj=ii; jj<bs2; jj+=bs) { 938 bap[jj] = *value++; 939 } 940 } 941 } else { 942 for (ii=0; ii<bs; ii++,value+=stepval) { 943 for (jj=0; jj<bs; jj++) { 944 *bap++ = *value++; 945 } 946 } 947 } 948 noinsert2:; 949 low = i; 950 } 951 ailen[row] = nrow; 952 } 953 PetscFunctionReturn(0); 954 } 955 956 #undef __FUNCT__ 957 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 958 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 959 { 960 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 961 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 962 int m = A->m,*ip,N,*ailen = a->ilen; 963 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 964 MatScalar *aa = a->a,*ap; 965 966 PetscFunctionBegin; 967 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 968 969 if (m) rmax = ailen[0]; 970 for (i=1; i<mbs; i++) { 971 /* move each row back by the amount of empty slots (fshift) before it*/ 972 fshift += imax[i-1] - ailen[i-1]; 973 rmax = PetscMax(rmax,ailen[i]); 974 if (fshift) { 975 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 976 N = ailen[i]; 977 for (j=0; j<N; j++) { 978 ip[j-fshift] = ip[j]; 979 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 980 } 981 } 982 ai[i] = ai[i-1] + ailen[i-1]; 983 } 984 if (mbs) { 985 fshift += imax[mbs-1] - ailen[mbs-1]; 986 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 987 } 988 /* reset ilen and imax for each row */ 989 for (i=0; i<mbs; i++) { 990 ailen[i] = imax[i] = ai[i+1] - ai[i]; 991 } 992 a->nz = ai[mbs]; 993 994 /* diagonals may have moved, so kill the diagonal pointers */ 995 if (fshift && a->diag) { 996 ierr = PetscFree(a->diag);CHKERRQ(ierr); 997 PetscLogObjectMemory(A,-(mbs+1)*sizeof(int)); 998 a->diag = 0; 999 } 1000 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2); 1001 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs); 1002 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 1003 a->reallocs = 0; 1004 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1005 1006 PetscFunctionReturn(0); 1007 } 1008 1009 1010 1011 /* 1012 This function returns an array of flags which indicate the locations of contiguous 1013 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1014 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1015 Assume: sizes should be long enough to hold all the values. 1016 */ 1017 #undef __FUNCT__ 1018 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1019 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 1020 { 1021 int i,j,k,row; 1022 PetscTruth flg; 1023 1024 PetscFunctionBegin; 1025 for (i=0,j=0; i<n; j++) { 1026 row = idx[i]; 1027 if (row%bs!=0) { /* Not the begining of a block */ 1028 sizes[j] = 1; 1029 i++; 1030 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1031 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1032 i++; 1033 } else { /* Begining of the block, so check if the complete block exists */ 1034 flg = PETSC_TRUE; 1035 for (k=1; k<bs; k++) { 1036 if (row+k != idx[i+k]) { /* break in the block */ 1037 flg = PETSC_FALSE; 1038 break; 1039 } 1040 } 1041 if (flg == PETSC_TRUE) { /* No break in the bs */ 1042 sizes[j] = bs; 1043 i+= bs; 1044 } else { 1045 sizes[j] = 1; 1046 i++; 1047 } 1048 } 1049 } 1050 *bs_max = j; 1051 PetscFunctionReturn(0); 1052 } 1053 1054 #undef __FUNCT__ 1055 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1056 int MatZeroRows_SeqBAIJ(Mat A,IS is,const PetscScalar *diag) 1057 { 1058 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1059 int ierr,i,j,k,count,is_n,*is_idx,*rows; 1060 int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max; 1061 PetscScalar zero = 0.0; 1062 MatScalar *aa; 1063 1064 PetscFunctionBegin; 1065 /* Make a copy of the IS and sort it */ 1066 ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr); 1067 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1068 1069 /* allocate memory for rows,sizes */ 1070 ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr); 1071 sizes = rows + is_n; 1072 1073 /* copy IS values to rows, and sort them */ 1074 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1075 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1076 if (baij->keepzeroedrows) { 1077 for (i=0; i<is_n; i++) { sizes[i] = 1; } 1078 bs_max = is_n; 1079 } else { 1080 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1081 } 1082 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 1083 1084 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1085 row = rows[j]; 1086 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row); 1087 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1088 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 1089 if (sizes[i] == bs && !baij->keepzeroedrows) { 1090 if (diag) { 1091 if (baij->ilen[row/bs] > 0) { 1092 baij->ilen[row/bs] = 1; 1093 baij->j[baij->i[row/bs]] = row/bs; 1094 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 1095 } 1096 /* Now insert all the diagonal values for this bs */ 1097 for (k=0; k<bs; k++) { 1098 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 1099 } 1100 } else { /* (!diag) */ 1101 baij->ilen[row/bs] = 0; 1102 } /* end (!diag) */ 1103 } else { /* (sizes[i] != bs) */ 1104 #if defined (PETSC_USE_DEBUG) 1105 if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1"); 1106 #endif 1107 for (k=0; k<count; k++) { 1108 aa[0] = zero; 1109 aa += bs; 1110 } 1111 if (diag) { 1112 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1113 } 1114 } 1115 } 1116 1117 ierr = PetscFree(rows);CHKERRQ(ierr); 1118 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1119 PetscFunctionReturn(0); 1120 } 1121 1122 #undef __FUNCT__ 1123 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1124 int MatSetValues_SeqBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is) 1125 { 1126 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1127 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 1128 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1129 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 1130 int ridx,cidx,bs2=a->bs2,ierr; 1131 PetscTruth roworiented=a->roworiented; 1132 MatScalar *ap,value,*aa=a->a,*bap; 1133 1134 PetscFunctionBegin; 1135 for (k=0; k<m; k++) { /* loop over added rows */ 1136 row = im[k]; brow = row/bs; 1137 if (row < 0) continue; 1138 #if defined(PETSC_USE_BOPT_g) 1139 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1); 1140 #endif 1141 rp = aj + ai[brow]; 1142 ap = aa + bs2*ai[brow]; 1143 rmax = imax[brow]; 1144 nrow = ailen[brow]; 1145 low = 0; 1146 for (l=0; l<n; l++) { /* loop over added columns */ 1147 if (in[l] < 0) continue; 1148 #if defined(PETSC_USE_BOPT_g) 1149 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1); 1150 #endif 1151 col = in[l]; bcol = col/bs; 1152 ridx = row % bs; cidx = col % bs; 1153 if (roworiented) { 1154 value = v[l + k*n]; 1155 } else { 1156 value = v[k + l*m]; 1157 } 1158 if (!sorted) low = 0; high = nrow; 1159 while (high-low > 7) { 1160 t = (low+high)/2; 1161 if (rp[t] > bcol) high = t; 1162 else low = t; 1163 } 1164 for (i=low; i<high; i++) { 1165 if (rp[i] > bcol) break; 1166 if (rp[i] == bcol) { 1167 bap = ap + bs2*i + bs*cidx + ridx; 1168 if (is == ADD_VALUES) *bap += value; 1169 else *bap = value; 1170 goto noinsert1; 1171 } 1172 } 1173 if (nonew == 1) goto noinsert1; 1174 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 1175 if (nrow >= rmax) { 1176 /* there is no extra room in row, therefore enlarge */ 1177 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1178 MatScalar *new_a; 1179 1180 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 1181 1182 /* Malloc new storage space */ 1183 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1184 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1185 new_j = (int*)(new_a + bs2*new_nz); 1186 new_i = new_j + new_nz; 1187 1188 /* copy over old data into new slots */ 1189 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 1190 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1191 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 1192 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1193 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 1194 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1195 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1196 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1197 /* free up old matrix storage */ 1198 ierr = PetscFree(a->a);CHKERRQ(ierr); 1199 if (!a->singlemalloc) { 1200 ierr = PetscFree(a->i);CHKERRQ(ierr); 1201 ierr = PetscFree(a->j);CHKERRQ(ierr); 1202 } 1203 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1204 a->singlemalloc = PETSC_TRUE; 1205 1206 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1207 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1208 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 1209 a->maxnz += bs2*CHUNKSIZE; 1210 a->reallocs++; 1211 a->nz++; 1212 } 1213 N = nrow++ - 1; 1214 /* shift up all the later entries in this row */ 1215 for (ii=N; ii>=i; ii--) { 1216 rp[ii+1] = rp[ii]; 1217 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1218 } 1219 if (N>=i) { 1220 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1221 } 1222 rp[i] = bcol; 1223 ap[bs2*i + bs*cidx + ridx] = value; 1224 noinsert1:; 1225 low = i; 1226 } 1227 ailen[brow] = nrow; 1228 } 1229 PetscFunctionReturn(0); 1230 } 1231 1232 1233 #undef __FUNCT__ 1234 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1235 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info) 1236 { 1237 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1238 Mat outA; 1239 int ierr; 1240 PetscTruth row_identity,col_identity; 1241 1242 PetscFunctionBegin; 1243 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1244 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1245 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1246 if (!row_identity || !col_identity) { 1247 SETERRQ(1,"Row and column permutations must be identity for in-place ILU"); 1248 } 1249 1250 outA = inA; 1251 inA->factor = FACTOR_LU; 1252 1253 if (!a->diag) { 1254 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1255 } 1256 1257 a->row = row; 1258 a->col = col; 1259 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1260 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1261 1262 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1263 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1264 PetscLogObjectParent(inA,a->icol); 1265 1266 /* 1267 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1268 for ILU(0) factorization with natural ordering 1269 */ 1270 if (a->bs < 8) { 1271 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1272 } else { 1273 if (!a->solve_work) { 1274 ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1275 PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar)); 1276 } 1277 } 1278 1279 ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr); 1280 1281 PetscFunctionReturn(0); 1282 } 1283 #undef __FUNCT__ 1284 #define __FUNCT__ "MatPrintHelp_SeqBAIJ" 1285 int MatPrintHelp_SeqBAIJ(Mat A) 1286 { 1287 static PetscTruth called = PETSC_FALSE; 1288 MPI_Comm comm = A->comm; 1289 int ierr; 1290 1291 PetscFunctionBegin; 1292 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1293 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1294 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1295 PetscFunctionReturn(0); 1296 } 1297 1298 EXTERN_C_BEGIN 1299 #undef __FUNCT__ 1300 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1301 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices) 1302 { 1303 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1304 int i,nz,nbs; 1305 1306 PetscFunctionBegin; 1307 nz = baij->maxnz/baij->bs2; 1308 nbs = baij->nbs; 1309 for (i=0; i<nz; i++) { 1310 baij->j[i] = indices[i]; 1311 } 1312 baij->nz = nz; 1313 for (i=0; i<nbs; i++) { 1314 baij->ilen[i] = baij->imax[i]; 1315 } 1316 1317 PetscFunctionReturn(0); 1318 } 1319 EXTERN_C_END 1320 1321 #undef __FUNCT__ 1322 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1323 /*@ 1324 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1325 in the matrix. 1326 1327 Input Parameters: 1328 + mat - the SeqBAIJ matrix 1329 - indices - the column indices 1330 1331 Level: advanced 1332 1333 Notes: 1334 This can be called if you have precomputed the nonzero structure of the 1335 matrix and want to provide it to the matrix object to improve the performance 1336 of the MatSetValues() operation. 1337 1338 You MUST have set the correct numbers of nonzeros per row in the call to 1339 MatCreateSeqBAIJ(). 1340 1341 MUST be called before any calls to MatSetValues(); 1342 1343 @*/ 1344 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices) 1345 { 1346 int ierr,(*f)(Mat,int *); 1347 1348 PetscFunctionBegin; 1349 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1350 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1351 if (f) { 1352 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1353 } else { 1354 SETERRQ(1,"Wrong type of matrix to set column indices"); 1355 } 1356 PetscFunctionReturn(0); 1357 } 1358 1359 #undef __FUNCT__ 1360 #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1361 int MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1362 { 1363 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1364 int ierr,i,j,n,row,bs,*ai,*aj,mbs; 1365 PetscReal atmp; 1366 PetscScalar *x,zero = 0.0; 1367 MatScalar *aa; 1368 int ncols,brow,krow,kcol; 1369 1370 PetscFunctionBegin; 1371 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1372 bs = a->bs; 1373 aa = a->a; 1374 ai = a->i; 1375 aj = a->j; 1376 mbs = a->mbs; 1377 1378 ierr = VecSet(&zero,v);CHKERRQ(ierr); 1379 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1380 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1381 if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1382 for (i=0; i<mbs; i++) { 1383 ncols = ai[1] - ai[0]; ai++; 1384 brow = bs*i; 1385 for (j=0; j<ncols; j++){ 1386 /* bcol = bs*(*aj); */ 1387 for (kcol=0; kcol<bs; kcol++){ 1388 for (krow=0; krow<bs; krow++){ 1389 atmp = PetscAbsScalar(*aa); aa++; 1390 row = brow + krow; /* row index */ 1391 /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */ 1392 if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1393 } 1394 } 1395 aj++; 1396 } 1397 } 1398 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1399 PetscFunctionReturn(0); 1400 } 1401 1402 #undef __FUNCT__ 1403 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1404 int MatSetUpPreallocation_SeqBAIJ(Mat A) 1405 { 1406 int ierr; 1407 1408 PetscFunctionBegin; 1409 ierr = MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1410 PetscFunctionReturn(0); 1411 } 1412 1413 #undef __FUNCT__ 1414 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1415 int MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1416 { 1417 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1418 PetscFunctionBegin; 1419 *array = a->a; 1420 PetscFunctionReturn(0); 1421 } 1422 1423 #undef __FUNCT__ 1424 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1425 int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1426 { 1427 PetscFunctionBegin; 1428 PetscFunctionReturn(0); 1429 } 1430 1431 #include "petscblaslapack.h" 1432 #undef __FUNCT__ 1433 #define __FUNCT__ "MatAXPY_SeqBAIJ" 1434 int MatAXPY_SeqBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str) 1435 { 1436 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 1437 int ierr,one=1,i,bs=y->bs,j,bs2; 1438 1439 PetscFunctionBegin; 1440 if (str == SAME_NONZERO_PATTERN) { 1441 BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one); 1442 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1443 if (y->xtoy && y->XtoY != X) { 1444 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1445 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1446 } 1447 if (!y->xtoy) { /* get xtoy */ 1448 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1449 y->XtoY = X; 1450 } 1451 bs2 = bs*bs; 1452 for (i=0; i<x->nz; i++) { 1453 j = 0; 1454 while (j < bs2){ 1455 y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]); 1456 j++; 1457 } 1458 } 1459 PetscLogInfo(0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %d/%d = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)); 1460 } else { 1461 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1462 } 1463 PetscFunctionReturn(0); 1464 } 1465 1466 /* -------------------------------------------------------------------*/ 1467 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 1468 MatGetRow_SeqBAIJ, 1469 MatRestoreRow_SeqBAIJ, 1470 MatMult_SeqBAIJ_N, 1471 /* 4*/ MatMultAdd_SeqBAIJ_N, 1472 MatMultTranspose_SeqBAIJ, 1473 MatMultTransposeAdd_SeqBAIJ, 1474 MatSolve_SeqBAIJ_N, 1475 0, 1476 0, 1477 /*10*/ 0, 1478 MatLUFactor_SeqBAIJ, 1479 0, 1480 0, 1481 MatTranspose_SeqBAIJ, 1482 /*15*/ MatGetInfo_SeqBAIJ, 1483 MatEqual_SeqBAIJ, 1484 MatGetDiagonal_SeqBAIJ, 1485 MatDiagonalScale_SeqBAIJ, 1486 MatNorm_SeqBAIJ, 1487 /*20*/ 0, 1488 MatAssemblyEnd_SeqBAIJ, 1489 0, 1490 MatSetOption_SeqBAIJ, 1491 MatZeroEntries_SeqBAIJ, 1492 /*25*/ MatZeroRows_SeqBAIJ, 1493 MatLUFactorSymbolic_SeqBAIJ, 1494 MatLUFactorNumeric_SeqBAIJ_N, 1495 0, 1496 0, 1497 /*30*/ MatSetUpPreallocation_SeqBAIJ, 1498 MatILUFactorSymbolic_SeqBAIJ, 1499 0, 1500 MatGetArray_SeqBAIJ, 1501 MatRestoreArray_SeqBAIJ, 1502 /*35*/ MatDuplicate_SeqBAIJ, 1503 0, 1504 0, 1505 MatILUFactor_SeqBAIJ, 1506 0, 1507 /*40*/ MatAXPY_SeqBAIJ, 1508 MatGetSubMatrices_SeqBAIJ, 1509 MatIncreaseOverlap_SeqBAIJ, 1510 MatGetValues_SeqBAIJ, 1511 0, 1512 /*45*/ MatPrintHelp_SeqBAIJ, 1513 MatScale_SeqBAIJ, 1514 0, 1515 0, 1516 0, 1517 /*50*/ MatGetBlockSize_SeqBAIJ, 1518 MatGetRowIJ_SeqBAIJ, 1519 MatRestoreRowIJ_SeqBAIJ, 1520 0, 1521 0, 1522 /*55*/ 0, 1523 0, 1524 0, 1525 0, 1526 MatSetValuesBlocked_SeqBAIJ, 1527 /*60*/ MatGetSubMatrix_SeqBAIJ, 1528 MatDestroy_SeqBAIJ, 1529 MatView_SeqBAIJ, 1530 MatGetPetscMaps_Petsc, 1531 0, 1532 /*65*/ 0, 1533 0, 1534 0, 1535 0, 1536 0, 1537 /*70*/ MatGetRowMax_SeqBAIJ, 1538 MatConvert_Basic, 1539 0, 1540 0, 1541 0, 1542 /*75*/ 0, 1543 0, 1544 0, 1545 0, 1546 0, 1547 /*80*/ 0, 1548 0, 1549 0, 1550 0, 1551 0, 1552 /*85*/ MatLoad_SeqBAIJ 1553 }; 1554 1555 EXTERN_C_BEGIN 1556 #undef __FUNCT__ 1557 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 1558 int MatStoreValues_SeqBAIJ(Mat mat) 1559 { 1560 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1561 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1562 int ierr; 1563 1564 PetscFunctionBegin; 1565 if (aij->nonew != 1) { 1566 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1567 } 1568 1569 /* allocate space for values if not already there */ 1570 if (!aij->saved_values) { 1571 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1572 } 1573 1574 /* copy values over */ 1575 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1576 PetscFunctionReturn(0); 1577 } 1578 EXTERN_C_END 1579 1580 EXTERN_C_BEGIN 1581 #undef __FUNCT__ 1582 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 1583 int MatRetrieveValues_SeqBAIJ(Mat mat) 1584 { 1585 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1586 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 1587 1588 PetscFunctionBegin; 1589 if (aij->nonew != 1) { 1590 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1591 } 1592 if (!aij->saved_values) { 1593 SETERRQ(1,"Must call MatStoreValues(A);first"); 1594 } 1595 1596 /* copy values over */ 1597 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1598 PetscFunctionReturn(0); 1599 } 1600 EXTERN_C_END 1601 1602 EXTERN_C_BEGIN 1603 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*); 1604 EXTERN_C_END 1605 1606 EXTERN_C_BEGIN 1607 #undef __FUNCT__ 1608 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 1609 int MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,int bs,int nz,int *nnz) 1610 { 1611 Mat_SeqBAIJ *b; 1612 int i,len,ierr,mbs,nbs,bs2,newbs = bs; 1613 PetscTruth flg; 1614 1615 PetscFunctionBegin; 1616 1617 B->preallocated = PETSC_TRUE; 1618 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr); 1619 if (nnz && newbs != bs) { 1620 SETERRQ(1,"Cannot change blocksize from command line if setting nnz"); 1621 } 1622 bs = newbs; 1623 1624 mbs = B->m/bs; 1625 nbs = B->n/bs; 1626 bs2 = bs*bs; 1627 1628 if (mbs*bs!=B->m || nbs*bs!=B->n) { 1629 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs); 1630 } 1631 1632 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1633 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1634 if (nnz) { 1635 for (i=0; i<mbs; i++) { 1636 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1637 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs); 1638 } 1639 } 1640 1641 b = (Mat_SeqBAIJ*)B->data; 1642 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1643 B->ops->solve = MatSolve_SeqBAIJ_Update; 1644 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 1645 if (!flg) { 1646 switch (bs) { 1647 case 1: 1648 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1649 B->ops->mult = MatMult_SeqBAIJ_1; 1650 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 1651 break; 1652 case 2: 1653 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 1654 B->ops->mult = MatMult_SeqBAIJ_2; 1655 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 1656 break; 1657 case 3: 1658 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 1659 B->ops->mult = MatMult_SeqBAIJ_3; 1660 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 1661 break; 1662 case 4: 1663 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 1664 B->ops->mult = MatMult_SeqBAIJ_4; 1665 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 1666 break; 1667 case 5: 1668 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 1669 B->ops->mult = MatMult_SeqBAIJ_5; 1670 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 1671 break; 1672 case 6: 1673 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 1674 B->ops->mult = MatMult_SeqBAIJ_6; 1675 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 1676 break; 1677 case 7: 1678 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 1679 B->ops->mult = MatMult_SeqBAIJ_7; 1680 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 1681 break; 1682 default: 1683 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 1684 B->ops->mult = MatMult_SeqBAIJ_N; 1685 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 1686 break; 1687 } 1688 } 1689 b->bs = bs; 1690 b->mbs = mbs; 1691 b->nbs = nbs; 1692 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 1693 if (!nnz) { 1694 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1695 else if (nz <= 0) nz = 1; 1696 for (i=0; i<mbs; i++) b->imax[i] = nz; 1697 nz = nz*mbs; 1698 } else { 1699 nz = 0; 1700 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1701 } 1702 1703 /* allocate the matrix space */ 1704 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 1705 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1706 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1707 b->j = (int*)(b->a + nz*bs2); 1708 ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 1709 b->i = b->j + nz; 1710 b->singlemalloc = PETSC_TRUE; 1711 1712 b->i[0] = 0; 1713 for (i=1; i<mbs+1; i++) { 1714 b->i[i] = b->i[i-1] + b->imax[i-1]; 1715 } 1716 1717 /* b->ilen will count nonzeros in each block row so far. */ 1718 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 1719 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1720 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1721 1722 b->bs = bs; 1723 b->bs2 = bs2; 1724 b->mbs = mbs; 1725 b->nz = 0; 1726 b->maxnz = nz*bs2; 1727 B->info.nz_unneeded = (PetscReal)b->maxnz; 1728 PetscFunctionReturn(0); 1729 } 1730 EXTERN_C_END 1731 1732 /*MC 1733 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 1734 block sparse compressed row format. 1735 1736 Options Database Keys: 1737 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 1738 1739 Level: beginner 1740 1741 .seealso: MatCreateSeqBAIJ 1742 M*/ 1743 1744 EXTERN_C_BEGIN 1745 #undef __FUNCT__ 1746 #define __FUNCT__ "MatCreate_SeqBAIJ" 1747 int MatCreate_SeqBAIJ(Mat B) 1748 { 1749 int ierr,size; 1750 Mat_SeqBAIJ *b; 1751 1752 PetscFunctionBegin; 1753 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1754 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1755 1756 B->m = B->M = PetscMax(B->m,B->M); 1757 B->n = B->N = PetscMax(B->n,B->N); 1758 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 1759 B->data = (void*)b; 1760 ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 1761 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1762 B->factor = 0; 1763 B->lupivotthreshold = 1.0; 1764 B->mapping = 0; 1765 b->row = 0; 1766 b->col = 0; 1767 b->icol = 0; 1768 b->reallocs = 0; 1769 b->saved_values = 0; 1770 #if defined(PETSC_USE_MAT_SINGLE) 1771 b->setvalueslen = 0; 1772 b->setvaluescopy = PETSC_NULL; 1773 #endif 1774 1775 ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr); 1776 ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr); 1777 1778 b->sorted = PETSC_FALSE; 1779 b->roworiented = PETSC_TRUE; 1780 b->nonew = 0; 1781 b->diag = 0; 1782 b->solve_work = 0; 1783 b->mult_work = 0; 1784 B->spptr = 0; 1785 B->info.nz_unneeded = (PetscReal)b->maxnz; 1786 b->keepzeroedrows = PETSC_FALSE; 1787 b->xtoy = 0; 1788 b->XtoY = 0; 1789 1790 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1791 "MatStoreValues_SeqBAIJ", 1792 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 1793 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1794 "MatRetrieveValues_SeqBAIJ", 1795 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 1796 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 1797 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 1798 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 1799 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 1800 "MatConvert_SeqBAIJ_SeqAIJ", 1801 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 1802 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 1803 "MatSeqBAIJSetPreallocation_SeqBAIJ", 1804 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 1805 PetscFunctionReturn(0); 1806 } 1807 EXTERN_C_END 1808 1809 #undef __FUNCT__ 1810 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 1811 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1812 { 1813 Mat C; 1814 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 1815 int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr; 1816 1817 PetscFunctionBegin; 1818 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1819 1820 *B = 0; 1821 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1822 ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr); 1823 c = (Mat_SeqBAIJ*)C->data; 1824 1825 c->bs = a->bs; 1826 c->bs2 = a->bs2; 1827 c->mbs = a->mbs; 1828 c->nbs = a->nbs; 1829 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1830 1831 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1832 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1833 for (i=0; i<mbs; i++) { 1834 c->imax[i] = a->imax[i]; 1835 c->ilen[i] = a->ilen[i]; 1836 } 1837 1838 /* allocate the matrix space */ 1839 c->singlemalloc = PETSC_TRUE; 1840 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1841 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1842 c->j = (int*)(c->a + nz*bs2); 1843 c->i = c->j + nz; 1844 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1845 if (mbs > 0) { 1846 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1847 if (cpvalues == MAT_COPY_VALUES) { 1848 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1849 } else { 1850 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1851 } 1852 } 1853 1854 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1855 c->sorted = a->sorted; 1856 c->roworiented = a->roworiented; 1857 c->nonew = a->nonew; 1858 1859 if (a->diag) { 1860 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1861 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1862 for (i=0; i<mbs; i++) { 1863 c->diag[i] = a->diag[i]; 1864 } 1865 } else c->diag = 0; 1866 c->nz = a->nz; 1867 c->maxnz = a->maxnz; 1868 c->solve_work = 0; 1869 C->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1870 c->mult_work = 0; 1871 C->preallocated = PETSC_TRUE; 1872 C->assembled = PETSC_TRUE; 1873 *B = C; 1874 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1875 PetscFunctionReturn(0); 1876 } 1877 1878 #undef __FUNCT__ 1879 #define __FUNCT__ "MatLoad_SeqBAIJ" 1880 int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A) 1881 { 1882 Mat_SeqBAIJ *a; 1883 Mat B; 1884 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1885 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1886 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1887 int *masked,nmask,tmp,bs2,ishift; 1888 PetscScalar *aa; 1889 MPI_Comm comm = ((PetscObject)viewer)->comm; 1890 1891 PetscFunctionBegin; 1892 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1893 bs2 = bs*bs; 1894 1895 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1896 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1897 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1898 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1899 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1900 M = header[1]; N = header[2]; nz = header[3]; 1901 1902 if (header[3] < 0) { 1903 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 1904 } 1905 1906 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1907 1908 /* 1909 This code adds extra rows to make sure the number of rows is 1910 divisible by the blocksize 1911 */ 1912 mbs = M/bs; 1913 extra_rows = bs - M + bs*(mbs); 1914 if (extra_rows == bs) extra_rows = 0; 1915 else mbs++; 1916 if (extra_rows) { 1917 PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 1918 } 1919 1920 /* read in row lengths */ 1921 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1922 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1923 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1924 1925 /* read in column indices */ 1926 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1927 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1928 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1929 1930 /* loop over row lengths determining block row lengths */ 1931 ierr = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr); 1932 ierr = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1933 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1934 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1935 masked = mask + mbs; 1936 rowcount = 0; nzcount = 0; 1937 for (i=0; i<mbs; i++) { 1938 nmask = 0; 1939 for (j=0; j<bs; j++) { 1940 kmax = rowlengths[rowcount]; 1941 for (k=0; k<kmax; k++) { 1942 tmp = jj[nzcount++]/bs; 1943 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1944 } 1945 rowcount++; 1946 } 1947 browlengths[i] += nmask; 1948 /* zero out the mask elements we set */ 1949 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1950 } 1951 1952 /* create our matrix */ 1953 ierr = MatCreate(comm,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows,&B); 1954 ierr = MatSetType(B,type);CHKERRQ(ierr); 1955 ierr = MatSeqBAIJSetPreallocation(B,bs,0,browlengths);CHKERRQ(ierr); 1956 a = (Mat_SeqBAIJ*)B->data; 1957 1958 /* set matrix "i" values */ 1959 a->i[0] = 0; 1960 for (i=1; i<= mbs; i++) { 1961 a->i[i] = a->i[i-1] + browlengths[i-1]; 1962 a->ilen[i-1] = browlengths[i-1]; 1963 } 1964 a->nz = 0; 1965 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 1966 1967 /* read in nonzero values */ 1968 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1969 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1970 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1971 1972 /* set "a" and "j" values into matrix */ 1973 nzcount = 0; jcount = 0; 1974 for (i=0; i<mbs; i++) { 1975 nzcountb = nzcount; 1976 nmask = 0; 1977 for (j=0; j<bs; j++) { 1978 kmax = rowlengths[i*bs+j]; 1979 for (k=0; k<kmax; k++) { 1980 tmp = jj[nzcount++]/bs; 1981 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1982 } 1983 } 1984 /* sort the masked values */ 1985 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1986 1987 /* set "j" values into matrix */ 1988 maskcount = 1; 1989 for (j=0; j<nmask; j++) { 1990 a->j[jcount++] = masked[j]; 1991 mask[masked[j]] = maskcount++; 1992 } 1993 /* set "a" values into matrix */ 1994 ishift = bs2*a->i[i]; 1995 for (j=0; j<bs; j++) { 1996 kmax = rowlengths[i*bs+j]; 1997 for (k=0; k<kmax; k++) { 1998 tmp = jj[nzcountb]/bs ; 1999 block = mask[tmp] - 1; 2000 point = jj[nzcountb] - bs*tmp; 2001 idx = ishift + bs2*block + j + bs*point; 2002 a->a[idx] = (MatScalar)aa[nzcountb++]; 2003 } 2004 } 2005 /* zero out the mask elements we set */ 2006 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2007 } 2008 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 2009 2010 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2011 ierr = PetscFree(browlengths);CHKERRQ(ierr); 2012 ierr = PetscFree(aa);CHKERRQ(ierr); 2013 ierr = PetscFree(jj);CHKERRQ(ierr); 2014 ierr = PetscFree(mask);CHKERRQ(ierr); 2015 2016 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2017 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2018 ierr = MatView_Private(B);CHKERRQ(ierr); 2019 2020 *A = B; 2021 PetscFunctionReturn(0); 2022 } 2023 2024 #undef __FUNCT__ 2025 #define __FUNCT__ "MatCreateSeqBAIJ" 2026 /*@C 2027 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 2028 compressed row) format. For good matrix assembly performance the 2029 user should preallocate the matrix storage by setting the parameter nz 2030 (or the array nnz). By setting these parameters accurately, performance 2031 during matrix assembly can be increased by more than a factor of 50. 2032 2033 Collective on MPI_Comm 2034 2035 Input Parameters: 2036 + comm - MPI communicator, set to PETSC_COMM_SELF 2037 . bs - size of block 2038 . m - number of rows 2039 . n - number of columns 2040 . nz - number of nonzero blocks per block row (same for all rows) 2041 - nnz - array containing the number of nonzero blocks in the various block rows 2042 (possibly different for each block row) or PETSC_NULL 2043 2044 Output Parameter: 2045 . A - the matrix 2046 2047 Options Database Keys: 2048 . -mat_no_unroll - uses code that does not unroll the loops in the 2049 block calculations (much slower) 2050 . -mat_block_size - size of the blocks to use 2051 2052 Level: intermediate 2053 2054 Notes: 2055 A nonzero block is any block that as 1 or more nonzeros in it 2056 2057 The block AIJ format is fully compatible with standard Fortran 77 2058 storage. That is, the stored row and column indices can begin at 2059 either one (as in Fortran) or zero. See the users' manual for details. 2060 2061 Specify the preallocated storage with either nz or nnz (not both). 2062 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2063 allocation. For additional details, see the users manual chapter on 2064 matrices. 2065 2066 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2067 @*/ 2068 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A) 2069 { 2070 int ierr; 2071 2072 PetscFunctionBegin; 2073 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 2074 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2075 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 2076 PetscFunctionReturn(0); 2077 } 2078 2079 #undef __FUNCT__ 2080 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 2081 /*@C 2082 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 2083 per row in the matrix. For good matrix assembly performance the 2084 user should preallocate the matrix storage by setting the parameter nz 2085 (or the array nnz). By setting these parameters accurately, performance 2086 during matrix assembly can be increased by more than a factor of 50. 2087 2088 Collective on MPI_Comm 2089 2090 Input Parameters: 2091 + A - the matrix 2092 . bs - size of block 2093 . nz - number of block nonzeros per block row (same for all rows) 2094 - nnz - array containing the number of block nonzeros in the various block rows 2095 (possibly different for each block row) or PETSC_NULL 2096 2097 Options Database Keys: 2098 . -mat_no_unroll - uses code that does not unroll the loops in the 2099 block calculations (much slower) 2100 . -mat_block_size - size of the blocks to use 2101 2102 Level: intermediate 2103 2104 Notes: 2105 The block AIJ format is fully compatible with standard Fortran 77 2106 storage. That is, the stored row and column indices can begin at 2107 either one (as in Fortran) or zero. See the users' manual for details. 2108 2109 Specify the preallocated storage with either nz or nnz (not both). 2110 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2111 allocation. For additional details, see the users manual chapter on 2112 matrices. 2113 2114 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2115 @*/ 2116 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[]) 2117 { 2118 int ierr,(*f)(Mat,int,int,const int[]); 2119 2120 PetscFunctionBegin; 2121 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2122 if (f) { 2123 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 2124 } 2125 PetscFunctionReturn(0); 2126 } 2127