1 /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/ 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" 8 #include "src/vec/vecimpl.h" 9 #include "src/inline/spops.h" 10 #include "petscsys.h" /*I "petscmat.h" I*/ 11 12 /* 13 Special version for Fun3d sequential benchmark 14 */ 15 #if defined(PETSC_HAVE_FORTRAN_CAPS) 16 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 17 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 18 #define matsetvaluesblocked4_ matsetvaluesblocked4 19 #endif 20 21 EXTERN_C_BEGIN 22 #undef __FUNCT__ 23 #define __FUNCT__ "matsetvaluesblocked4_" 24 void matsetvaluesblocked4_(Mat *AA,int *mm,const int im[],int *nn,const int in[],const PetscScalar v[]) 25 { 26 Mat A = *AA; 27 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 28 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 29 int *ai=a->i,*ailen=a->ilen; 30 int *aj=a->j,stepval; 31 const PetscScalar *value = v; 32 MatScalar *ap,*aa = a->a,*bap; 33 34 PetscFunctionBegin; 35 stepval = (n-1)*4; 36 for (k=0; k<m; k++) { /* loop over added rows */ 37 row = im[k]; 38 rp = aj + ai[row]; 39 ap = aa + 16*ai[row]; 40 nrow = ailen[row]; 41 low = 0; 42 for (l=0; l<n; l++) { /* loop over added columns */ 43 col = in[l]; 44 value = v + k*(stepval+4)*4 + l*4; 45 low = 0; high = nrow; 46 while (high-low > 7) { 47 t = (low+high)/2; 48 if (rp[t] > col) high = t; 49 else low = t; 50 } 51 for (i=low; i<high; i++) { 52 if (rp[i] > col) break; 53 if (rp[i] == col) { 54 bap = ap + 16*i; 55 for (ii=0; ii<4; ii++,value+=stepval) { 56 for (jj=ii; jj<16; jj+=4) { 57 bap[jj] += *value++; 58 } 59 } 60 goto noinsert2; 61 } 62 } 63 N = nrow++ - 1; 64 /* shift up all the later entries in this row */ 65 for (ii=N; ii>=i; ii--) { 66 rp[ii+1] = rp[ii]; 67 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 68 } 69 if (N >= i) { 70 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 71 } 72 rp[i] = col; 73 bap = ap + 16*i; 74 for (ii=0; ii<4; ii++,value+=stepval) { 75 for (jj=ii; jj<16; jj+=4) { 76 bap[jj] = *value++; 77 } 78 } 79 noinsert2:; 80 low = i; 81 } 82 ailen[row] = nrow; 83 } 84 } 85 EXTERN_C_END 86 87 #if defined(PETSC_HAVE_FORTRAN_CAPS) 88 #define matsetvalues4_ MATSETVALUES4 89 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 90 #define matsetvalues4_ matsetvalues4 91 #endif 92 93 EXTERN_C_BEGIN 94 #undef __FUNCT__ 95 #define __FUNCT__ "MatSetValues4_" 96 void matsetvalues4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v) 97 { 98 Mat A = *AA; 99 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 100 int *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 101 int *ai=a->i,*ailen=a->ilen; 102 int *aj=a->j,brow,bcol; 103 int ridx,cidx; 104 MatScalar *ap,value,*aa=a->a,*bap; 105 106 PetscFunctionBegin; 107 for (k=0; k<m; k++) { /* loop over added rows */ 108 row = im[k]; brow = row/4; 109 rp = aj + ai[brow]; 110 ap = aa + 16*ai[brow]; 111 nrow = ailen[brow]; 112 low = 0; 113 for (l=0; l<n; l++) { /* loop over added columns */ 114 col = in[l]; bcol = col/4; 115 ridx = row % 4; cidx = col % 4; 116 value = v[l + k*n]; 117 low = 0; high = nrow; 118 while (high-low > 7) { 119 t = (low+high)/2; 120 if (rp[t] > bcol) high = t; 121 else low = t; 122 } 123 for (i=low; i<high; i++) { 124 if (rp[i] > bcol) break; 125 if (rp[i] == bcol) { 126 bap = ap + 16*i + 4*cidx + ridx; 127 *bap += value; 128 goto noinsert1; 129 } 130 } 131 N = nrow++ - 1; 132 /* shift up all the later entries in this row */ 133 for (ii=N; ii>=i; ii--) { 134 rp[ii+1] = rp[ii]; 135 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 136 } 137 if (N>=i) { 138 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 139 } 140 rp[i] = bcol; 141 ap[16*i + 4*cidx + ridx] = value; 142 noinsert1:; 143 low = i; 144 } 145 ailen[brow] = nrow; 146 } 147 } 148 EXTERN_C_END 149 150 /* UGLY, ugly, ugly 151 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 152 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 153 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 154 converts the entries into single precision and then calls ..._MatScalar() to put them 155 into the single precision data structures. 156 */ 157 #if defined(PETSC_USE_MAT_SINGLE) 158 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 159 #else 160 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 161 #endif 162 163 #define CHUNKSIZE 10 164 165 /* 166 Checks for missing diagonals 167 */ 168 #undef __FUNCT__ 169 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 170 int MatMissingDiagonal_SeqBAIJ(Mat A) 171 { 172 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 173 int *diag,*jj = a->j,i,ierr; 174 175 PetscFunctionBegin; 176 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 177 diag = a->diag; 178 for (i=0; i<a->mbs; i++) { 179 if (jj[diag[i]] != i) { 180 SETERRQ1(1,"Matrix is missing diagonal number %d",i); 181 } 182 } 183 PetscFunctionReturn(0); 184 } 185 186 #undef __FUNCT__ 187 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 188 int MatMarkDiagonal_SeqBAIJ(Mat A) 189 { 190 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 191 int i,j,*diag,m = a->mbs,ierr; 192 193 PetscFunctionBegin; 194 if (a->diag) PetscFunctionReturn(0); 195 196 ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr); 197 PetscLogObjectMemory(A,(m+1)*sizeof(int)); 198 for (i=0; i<m; i++) { 199 diag[i] = a->i[i+1]; 200 for (j=a->i[i]; j<a->i[i+1]; j++) { 201 if (a->j[j] == i) { 202 diag[i] = j; 203 break; 204 } 205 } 206 } 207 a->diag = diag; 208 PetscFunctionReturn(0); 209 } 210 211 212 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 213 214 #undef __FUNCT__ 215 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 216 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done) 217 { 218 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 219 int ierr,n = a->mbs,i; 220 221 PetscFunctionBegin; 222 *nn = n; 223 if (!ia) PetscFunctionReturn(0); 224 if (symmetric) { 225 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 226 } else if (oshift == 1) { 227 /* temporarily add 1 to i and j indices */ 228 int nz = a->i[n]; 229 for (i=0; i<nz; i++) a->j[i]++; 230 for (i=0; i<n+1; i++) a->i[i]++; 231 *ia = a->i; *ja = a->j; 232 } else { 233 *ia = a->i; *ja = a->j; 234 } 235 236 PetscFunctionReturn(0); 237 } 238 239 #undef __FUNCT__ 240 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 241 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done) 242 { 243 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 244 int i,n = a->mbs,ierr; 245 246 PetscFunctionBegin; 247 if (!ia) PetscFunctionReturn(0); 248 if (symmetric) { 249 ierr = PetscFree(*ia);CHKERRQ(ierr); 250 ierr = PetscFree(*ja);CHKERRQ(ierr); 251 } else if (oshift == 1) { 252 int nz = a->i[n]-1; 253 for (i=0; i<nz; i++) a->j[i]--; 254 for (i=0; i<n+1; i++) a->i[i]--; 255 } 256 PetscFunctionReturn(0); 257 } 258 259 #undef __FUNCT__ 260 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ" 261 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs) 262 { 263 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 264 265 PetscFunctionBegin; 266 *bs = baij->bs; 267 PetscFunctionReturn(0); 268 } 269 270 271 #undef __FUNCT__ 272 #define __FUNCT__ "MatDestroy_SeqBAIJ" 273 int MatDestroy_SeqBAIJ(Mat A) 274 { 275 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 276 int ierr; 277 278 PetscFunctionBegin; 279 #if defined(PETSC_USE_LOG) 280 PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz); 281 #endif 282 ierr = PetscFree(a->a);CHKERRQ(ierr); 283 if (!a->singlemalloc) { 284 ierr = PetscFree(a->i);CHKERRQ(ierr); 285 ierr = PetscFree(a->j);CHKERRQ(ierr); 286 } 287 if (a->row) { 288 ierr = ISDestroy(a->row);CHKERRQ(ierr); 289 } 290 if (a->col) { 291 ierr = ISDestroy(a->col);CHKERRQ(ierr); 292 } 293 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 294 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 295 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 296 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 297 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 298 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 299 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 300 #if defined(PETSC_USE_MAT_SINGLE) 301 if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);} 302 #endif 303 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 304 305 ierr = PetscFree(a);CHKERRQ(ierr); 306 PetscFunctionReturn(0); 307 } 308 309 #undef __FUNCT__ 310 #define __FUNCT__ "MatSetOption_SeqBAIJ" 311 int MatSetOption_SeqBAIJ(Mat A,MatOption op) 312 { 313 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 314 315 PetscFunctionBegin; 316 switch (op) { 317 case MAT_ROW_ORIENTED: 318 a->roworiented = PETSC_TRUE; 319 break; 320 case MAT_COLUMN_ORIENTED: 321 a->roworiented = PETSC_FALSE; 322 break; 323 case MAT_COLUMNS_SORTED: 324 a->sorted = PETSC_TRUE; 325 break; 326 case MAT_COLUMNS_UNSORTED: 327 a->sorted = PETSC_FALSE; 328 break; 329 case MAT_KEEP_ZEROED_ROWS: 330 a->keepzeroedrows = PETSC_TRUE; 331 break; 332 case MAT_NO_NEW_NONZERO_LOCATIONS: 333 a->nonew = 1; 334 break; 335 case MAT_NEW_NONZERO_LOCATION_ERR: 336 a->nonew = -1; 337 break; 338 case MAT_NEW_NONZERO_ALLOCATION_ERR: 339 a->nonew = -2; 340 break; 341 case MAT_YES_NEW_NONZERO_LOCATIONS: 342 a->nonew = 0; 343 break; 344 case MAT_ROWS_SORTED: 345 case MAT_ROWS_UNSORTED: 346 case MAT_YES_NEW_DIAGONALS: 347 case MAT_IGNORE_OFF_PROC_ENTRIES: 348 case MAT_USE_HASH_TABLE: 349 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 350 break; 351 case MAT_NO_NEW_DIAGONALS: 352 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 353 default: 354 SETERRQ(PETSC_ERR_SUP,"unknown option"); 355 } 356 PetscFunctionReturn(0); 357 } 358 359 #undef __FUNCT__ 360 #define __FUNCT__ "MatGetRow_SeqBAIJ" 361 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 362 { 363 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 364 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr; 365 MatScalar *aa,*aa_i; 366 PetscScalar *v_i; 367 368 PetscFunctionBegin; 369 bs = a->bs; 370 ai = a->i; 371 aj = a->j; 372 aa = a->a; 373 bs2 = a->bs2; 374 375 if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range"); 376 377 bn = row/bs; /* Block number */ 378 bp = row % bs; /* Block Position */ 379 M = ai[bn+1] - ai[bn]; 380 *nz = bs*M; 381 382 if (v) { 383 *v = 0; 384 if (*nz) { 385 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 386 for (i=0; i<M; i++) { /* for each block in the block row */ 387 v_i = *v + i*bs; 388 aa_i = aa + bs2*(ai[bn] + i); 389 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 390 } 391 } 392 } 393 394 if (idx) { 395 *idx = 0; 396 if (*nz) { 397 ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr); 398 for (i=0; i<M; i++) { /* for each block in the block row */ 399 idx_i = *idx + i*bs; 400 itmp = bs*aj[ai[bn] + i]; 401 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 402 } 403 } 404 } 405 PetscFunctionReturn(0); 406 } 407 408 #undef __FUNCT__ 409 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 410 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 411 { 412 int ierr; 413 414 PetscFunctionBegin; 415 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 416 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 417 PetscFunctionReturn(0); 418 } 419 420 #undef __FUNCT__ 421 #define __FUNCT__ "MatTranspose_SeqBAIJ" 422 int MatTranspose_SeqBAIJ(Mat A,Mat *B) 423 { 424 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 425 Mat C; 426 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 427 int *rows,*cols,bs2=a->bs2; 428 PetscScalar *array; 429 430 PetscFunctionBegin; 431 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 432 ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr); 433 ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr); 434 435 #if defined(PETSC_USE_MAT_SINGLE) 436 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 437 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 438 #else 439 array = a->a; 440 #endif 441 442 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 443 ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr); 444 ierr = PetscFree(col);CHKERRQ(ierr); 445 ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr); 446 cols = rows + bs; 447 for (i=0; i<mbs; i++) { 448 cols[0] = i*bs; 449 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 450 len = ai[i+1] - ai[i]; 451 for (j=0; j<len; j++) { 452 rows[0] = (*aj++)*bs; 453 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 454 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 455 array += bs2; 456 } 457 } 458 ierr = PetscFree(rows);CHKERRQ(ierr); 459 #if defined(PETSC_USE_MAT_SINGLE) 460 ierr = PetscFree(array); 461 #endif 462 463 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 464 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 465 466 if (B) { 467 *B = C; 468 } else { 469 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 470 } 471 PetscFunctionReturn(0); 472 } 473 474 #undef __FUNCT__ 475 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 476 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 477 { 478 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 479 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 480 PetscScalar *aa; 481 FILE *file; 482 483 PetscFunctionBegin; 484 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 485 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 486 col_lens[0] = MAT_FILE_COOKIE; 487 488 col_lens[1] = A->m; 489 col_lens[2] = A->n; 490 col_lens[3] = a->nz*bs2; 491 492 /* store lengths of each row and write (including header) to file */ 493 count = 0; 494 for (i=0; i<a->mbs; i++) { 495 for (j=0; j<bs; j++) { 496 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 497 } 498 } 499 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 500 ierr = PetscFree(col_lens);CHKERRQ(ierr); 501 502 /* store column indices (zero start index) */ 503 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 504 count = 0; 505 for (i=0; i<a->mbs; i++) { 506 for (j=0; j<bs; j++) { 507 for (k=a->i[i]; k<a->i[i+1]; k++) { 508 for (l=0; l<bs; l++) { 509 jj[count++] = bs*a->j[k] + l; 510 } 511 } 512 } 513 } 514 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr); 515 ierr = PetscFree(jj);CHKERRQ(ierr); 516 517 /* store nonzero values */ 518 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 519 count = 0; 520 for (i=0; i<a->mbs; i++) { 521 for (j=0; j<bs; j++) { 522 for (k=a->i[i]; k<a->i[i+1]; k++) { 523 for (l=0; l<bs; l++) { 524 aa[count++] = a->a[bs2*k + l*bs + j]; 525 } 526 } 527 } 528 } 529 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr); 530 ierr = PetscFree(aa);CHKERRQ(ierr); 531 532 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 533 if (file) { 534 fprintf(file,"-matload_block_size %d\n",a->bs); 535 } 536 PetscFunctionReturn(0); 537 } 538 539 #undef __FUNCT__ 540 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 541 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 542 { 543 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 544 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 545 PetscViewerFormat format; 546 547 PetscFunctionBegin; 548 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 549 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 550 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 551 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 552 Mat aij; 553 ierr = MatConvert(A,MATSEQAIJ,&aij);CHKERRQ(ierr); 554 ierr = MatView(aij,viewer);CHKERRQ(ierr); 555 ierr = MatDestroy(aij);CHKERRQ(ierr); 556 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 557 PetscFunctionReturn(0); 558 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 559 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 560 for (i=0; i<a->mbs; i++) { 561 for (j=0; j<bs; j++) { 562 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 563 for (k=a->i[i]; k<a->i[i+1]; k++) { 564 for (l=0; l<bs; l++) { 565 #if defined(PETSC_USE_COMPLEX) 566 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 567 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %gi) ",bs*a->j[k]+l, 568 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 569 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 570 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %gi) ",bs*a->j[k]+l, 571 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 572 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 573 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 574 } 575 #else 576 if (a->a[bs2*k + l*bs + j] != 0.0) { 577 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 578 } 579 #endif 580 } 581 } 582 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 583 } 584 } 585 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 586 } else { 587 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 588 for (i=0; i<a->mbs; i++) { 589 for (j=0; j<bs; j++) { 590 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 591 for (k=a->i[i]; k<a->i[i+1]; k++) { 592 for (l=0; l<bs; l++) { 593 #if defined(PETSC_USE_COMPLEX) 594 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 595 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 596 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 597 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 598 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 599 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 600 } else { 601 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 602 } 603 #else 604 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 605 #endif 606 } 607 } 608 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 609 } 610 } 611 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 612 } 613 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 614 PetscFunctionReturn(0); 615 } 616 617 #undef __FUNCT__ 618 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 619 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 620 { 621 Mat A = (Mat) Aa; 622 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 623 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2; 624 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 625 MatScalar *aa; 626 PetscViewer viewer; 627 628 PetscFunctionBegin; 629 630 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 631 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 632 633 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 634 635 /* loop over matrix elements drawing boxes */ 636 color = PETSC_DRAW_BLUE; 637 for (i=0,row=0; i<mbs; i++,row+=bs) { 638 for (j=a->i[i]; j<a->i[i+1]; j++) { 639 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 640 x_l = a->j[j]*bs; x_r = x_l + 1.0; 641 aa = a->a + j*bs2; 642 for (k=0; k<bs; k++) { 643 for (l=0; l<bs; l++) { 644 if (PetscRealPart(*aa++) >= 0.) continue; 645 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 646 } 647 } 648 } 649 } 650 color = PETSC_DRAW_CYAN; 651 for (i=0,row=0; i<mbs; i++,row+=bs) { 652 for (j=a->i[i]; j<a->i[i+1]; j++) { 653 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 654 x_l = a->j[j]*bs; x_r = x_l + 1.0; 655 aa = a->a + j*bs2; 656 for (k=0; k<bs; k++) { 657 for (l=0; l<bs; l++) { 658 if (PetscRealPart(*aa++) != 0.) continue; 659 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 660 } 661 } 662 } 663 } 664 665 color = PETSC_DRAW_RED; 666 for (i=0,row=0; i<mbs; i++,row+=bs) { 667 for (j=a->i[i]; j<a->i[i+1]; j++) { 668 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 669 x_l = a->j[j]*bs; x_r = x_l + 1.0; 670 aa = a->a + j*bs2; 671 for (k=0; k<bs; k++) { 672 for (l=0; l<bs; l++) { 673 if (PetscRealPart(*aa++) <= 0.) continue; 674 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 675 } 676 } 677 } 678 } 679 PetscFunctionReturn(0); 680 } 681 682 #undef __FUNCT__ 683 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 684 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 685 { 686 int ierr; 687 PetscReal xl,yl,xr,yr,w,h; 688 PetscDraw draw; 689 PetscTruth isnull; 690 691 PetscFunctionBegin; 692 693 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 694 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 695 696 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 697 xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0; 698 xr += w; yr += h; xl = -w; yl = -h; 699 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 700 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 701 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 702 PetscFunctionReturn(0); 703 } 704 705 #undef __FUNCT__ 706 #define __FUNCT__ "MatView_SeqBAIJ" 707 int MatView_SeqBAIJ(Mat A,PetscViewer viewer) 708 { 709 int ierr; 710 PetscTruth issocket,isascii,isbinary,isdraw; 711 712 PetscFunctionBegin; 713 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 714 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 715 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 716 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 717 if (issocket) { 718 SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 719 } else if (isascii){ 720 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 721 } else if (isbinary) { 722 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 723 } else if (isdraw) { 724 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 725 } else { 726 SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name); 727 } 728 PetscFunctionReturn(0); 729 } 730 731 732 #undef __FUNCT__ 733 #define __FUNCT__ "MatGetValues_SeqBAIJ" 734 int MatGetValues_SeqBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[]) 735 { 736 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 737 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 738 int *ai = a->i,*ailen = a->ilen; 739 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 740 MatScalar *ap,*aa = a->a,zero = 0.0; 741 742 PetscFunctionBegin; 743 for (k=0; k<m; k++) { /* loop over rows */ 744 row = im[k]; brow = row/bs; 745 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 746 if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 747 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 748 nrow = ailen[brow]; 749 for (l=0; l<n; l++) { /* loop over columns */ 750 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 751 if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 752 col = in[l] ; 753 bcol = col/bs; 754 cidx = col%bs; 755 ridx = row%bs; 756 high = nrow; 757 low = 0; /* assume unsorted */ 758 while (high-low > 5) { 759 t = (low+high)/2; 760 if (rp[t] > bcol) high = t; 761 else low = t; 762 } 763 for (i=low; i<high; i++) { 764 if (rp[i] > bcol) break; 765 if (rp[i] == bcol) { 766 *v++ = ap[bs2*i+bs*cidx+ridx]; 767 goto finished; 768 } 769 } 770 *v++ = zero; 771 finished:; 772 } 773 } 774 PetscFunctionReturn(0); 775 } 776 777 #if defined(PETSC_USE_MAT_SINGLE) 778 #undef __FUNCT__ 779 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 780 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 781 { 782 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 783 int ierr,i,N = m*n*b->bs2; 784 MatScalar *vsingle; 785 786 PetscFunctionBegin; 787 if (N > b->setvalueslen) { 788 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 789 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 790 b->setvalueslen = N; 791 } 792 vsingle = b->setvaluescopy; 793 for (i=0; i<N; i++) { 794 vsingle[i] = v[i]; 795 } 796 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 797 PetscFunctionReturn(0); 798 } 799 #endif 800 801 802 #undef __FUNCT__ 803 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 804 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode is) 805 { 806 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 807 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 808 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 809 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 810 PetscTruth roworiented=a->roworiented; 811 const MatScalar *value = v; 812 MatScalar *ap,*aa = a->a,*bap; 813 814 PetscFunctionBegin; 815 if (roworiented) { 816 stepval = (n-1)*bs; 817 } else { 818 stepval = (m-1)*bs; 819 } 820 for (k=0; k<m; k++) { /* loop over added rows */ 821 row = im[k]; 822 if (row < 0) continue; 823 #if defined(PETSC_USE_BOPT_g) 824 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1); 825 #endif 826 rp = aj + ai[row]; 827 ap = aa + bs2*ai[row]; 828 rmax = imax[row]; 829 nrow = ailen[row]; 830 low = 0; 831 for (l=0; l<n; l++) { /* loop over added columns */ 832 if (in[l] < 0) continue; 833 #if defined(PETSC_USE_BOPT_g) 834 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],a->nbs-1); 835 #endif 836 col = in[l]; 837 if (roworiented) { 838 value = v + k*(stepval+bs)*bs + l*bs; 839 } else { 840 value = v + l*(stepval+bs)*bs + k*bs; 841 } 842 if (!sorted) low = 0; high = nrow; 843 while (high-low > 7) { 844 t = (low+high)/2; 845 if (rp[t] > col) high = t; 846 else low = t; 847 } 848 for (i=low; i<high; i++) { 849 if (rp[i] > col) break; 850 if (rp[i] == col) { 851 bap = ap + bs2*i; 852 if (roworiented) { 853 if (is == ADD_VALUES) { 854 for (ii=0; ii<bs; ii++,value+=stepval) { 855 for (jj=ii; jj<bs2; jj+=bs) { 856 bap[jj] += *value++; 857 } 858 } 859 } else { 860 for (ii=0; ii<bs; ii++,value+=stepval) { 861 for (jj=ii; jj<bs2; jj+=bs) { 862 bap[jj] = *value++; 863 } 864 } 865 } 866 } else { 867 if (is == ADD_VALUES) { 868 for (ii=0; ii<bs; ii++,value+=stepval) { 869 for (jj=0; jj<bs; jj++) { 870 *bap++ += *value++; 871 } 872 } 873 } else { 874 for (ii=0; ii<bs; ii++,value+=stepval) { 875 for (jj=0; jj<bs; jj++) { 876 *bap++ = *value++; 877 } 878 } 879 } 880 } 881 goto noinsert2; 882 } 883 } 884 if (nonew == 1) goto noinsert2; 885 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 886 if (nrow >= rmax) { 887 /* there is no extra room in row, therefore enlarge */ 888 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 889 MatScalar *new_a; 890 891 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 892 893 /* malloc new storage space */ 894 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 895 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 896 new_j = (int*)(new_a + bs2*new_nz); 897 new_i = new_j + new_nz; 898 899 /* copy over old data into new slots */ 900 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 901 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 902 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 903 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 904 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 905 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 906 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 907 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 908 /* free up old matrix storage */ 909 ierr = PetscFree(a->a);CHKERRQ(ierr); 910 if (!a->singlemalloc) { 911 ierr = PetscFree(a->i);CHKERRQ(ierr); 912 ierr = PetscFree(a->j);CHKERRQ(ierr); 913 } 914 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 915 a->singlemalloc = PETSC_TRUE; 916 917 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 918 rmax = imax[row] = imax[row] + CHUNKSIZE; 919 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 920 a->maxnz += bs2*CHUNKSIZE; 921 a->reallocs++; 922 a->nz++; 923 } 924 N = nrow++ - 1; 925 /* shift up all the later entries in this row */ 926 for (ii=N; ii>=i; ii--) { 927 rp[ii+1] = rp[ii]; 928 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 929 } 930 if (N >= i) { 931 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 932 } 933 rp[i] = col; 934 bap = ap + bs2*i; 935 if (roworiented) { 936 for (ii=0; ii<bs; ii++,value+=stepval) { 937 for (jj=ii; jj<bs2; jj+=bs) { 938 bap[jj] = *value++; 939 } 940 } 941 } else { 942 for (ii=0; ii<bs; ii++,value+=stepval) { 943 for (jj=0; jj<bs; jj++) { 944 *bap++ = *value++; 945 } 946 } 947 } 948 noinsert2:; 949 low = i; 950 } 951 ailen[row] = nrow; 952 } 953 PetscFunctionReturn(0); 954 } 955 956 #undef __FUNCT__ 957 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 958 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 959 { 960 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 961 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 962 int m = A->m,*ip,N,*ailen = a->ilen; 963 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 964 MatScalar *aa = a->a,*ap; 965 966 PetscFunctionBegin; 967 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 968 969 if (m) rmax = ailen[0]; 970 for (i=1; i<mbs; i++) { 971 /* move each row back by the amount of empty slots (fshift) before it*/ 972 fshift += imax[i-1] - ailen[i-1]; 973 rmax = PetscMax(rmax,ailen[i]); 974 if (fshift) { 975 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 976 N = ailen[i]; 977 for (j=0; j<N; j++) { 978 ip[j-fshift] = ip[j]; 979 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 980 } 981 } 982 ai[i] = ai[i-1] + ailen[i-1]; 983 } 984 if (mbs) { 985 fshift += imax[mbs-1] - ailen[mbs-1]; 986 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 987 } 988 /* reset ilen and imax for each row */ 989 for (i=0; i<mbs; i++) { 990 ailen[i] = imax[i] = ai[i+1] - ai[i]; 991 } 992 a->nz = ai[mbs]; 993 994 /* diagonals may have moved, so kill the diagonal pointers */ 995 if (fshift && a->diag) { 996 ierr = PetscFree(a->diag);CHKERRQ(ierr); 997 PetscLogObjectMemory(A,-(mbs+1)*sizeof(int)); 998 a->diag = 0; 999 } 1000 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2); 1001 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs); 1002 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 1003 a->reallocs = 0; 1004 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1005 1006 PetscFunctionReturn(0); 1007 } 1008 1009 1010 1011 /* 1012 This function returns an array of flags which indicate the locations of contiguous 1013 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1014 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1015 Assume: sizes should be long enough to hold all the values. 1016 */ 1017 #undef __FUNCT__ 1018 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1019 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 1020 { 1021 int i,j,k,row; 1022 PetscTruth flg; 1023 1024 PetscFunctionBegin; 1025 for (i=0,j=0; i<n; j++) { 1026 row = idx[i]; 1027 if (row%bs!=0) { /* Not the begining of a block */ 1028 sizes[j] = 1; 1029 i++; 1030 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1031 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1032 i++; 1033 } else { /* Begining of the block, so check if the complete block exists */ 1034 flg = PETSC_TRUE; 1035 for (k=1; k<bs; k++) { 1036 if (row+k != idx[i+k]) { /* break in the block */ 1037 flg = PETSC_FALSE; 1038 break; 1039 } 1040 } 1041 if (flg == PETSC_TRUE) { /* No break in the bs */ 1042 sizes[j] = bs; 1043 i+= bs; 1044 } else { 1045 sizes[j] = 1; 1046 i++; 1047 } 1048 } 1049 } 1050 *bs_max = j; 1051 PetscFunctionReturn(0); 1052 } 1053 1054 #undef __FUNCT__ 1055 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1056 int MatZeroRows_SeqBAIJ(Mat A,IS is,const PetscScalar *diag) 1057 { 1058 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1059 int ierr,i,j,k,count,is_n,*is_idx,*rows; 1060 int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max; 1061 PetscScalar zero = 0.0; 1062 MatScalar *aa; 1063 1064 PetscFunctionBegin; 1065 /* Make a copy of the IS and sort it */ 1066 ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr); 1067 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1068 1069 /* allocate memory for rows,sizes */ 1070 ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr); 1071 sizes = rows + is_n; 1072 1073 /* copy IS values to rows, and sort them */ 1074 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1075 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1076 if (baij->keepzeroedrows) { 1077 for (i=0; i<is_n; i++) { sizes[i] = 1; } 1078 bs_max = is_n; 1079 } else { 1080 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1081 } 1082 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 1083 1084 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1085 row = rows[j]; 1086 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row); 1087 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1088 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 1089 if (sizes[i] == bs && !baij->keepzeroedrows) { 1090 if (diag) { 1091 if (baij->ilen[row/bs] > 0) { 1092 baij->ilen[row/bs] = 1; 1093 baij->j[baij->i[row/bs]] = row/bs; 1094 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 1095 } 1096 /* Now insert all the diagonal values for this bs */ 1097 for (k=0; k<bs; k++) { 1098 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 1099 } 1100 } else { /* (!diag) */ 1101 baij->ilen[row/bs] = 0; 1102 } /* end (!diag) */ 1103 } else { /* (sizes[i] != bs) */ 1104 #if defined (PETSC_USE_DEBUG) 1105 if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1"); 1106 #endif 1107 for (k=0; k<count; k++) { 1108 aa[0] = zero; 1109 aa += bs; 1110 } 1111 if (diag) { 1112 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1113 } 1114 } 1115 } 1116 1117 ierr = PetscFree(rows);CHKERRQ(ierr); 1118 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1119 PetscFunctionReturn(0); 1120 } 1121 1122 #undef __FUNCT__ 1123 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1124 int MatSetValues_SeqBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is) 1125 { 1126 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1127 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 1128 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1129 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 1130 int ridx,cidx,bs2=a->bs2,ierr; 1131 PetscTruth roworiented=a->roworiented; 1132 MatScalar *ap,value,*aa=a->a,*bap; 1133 1134 PetscFunctionBegin; 1135 for (k=0; k<m; k++) { /* loop over added rows */ 1136 row = im[k]; brow = row/bs; 1137 if (row < 0) continue; 1138 #if defined(PETSC_USE_BOPT_g) 1139 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1); 1140 #endif 1141 rp = aj + ai[brow]; 1142 ap = aa + bs2*ai[brow]; 1143 rmax = imax[brow]; 1144 nrow = ailen[brow]; 1145 low = 0; 1146 for (l=0; l<n; l++) { /* loop over added columns */ 1147 if (in[l] < 0) continue; 1148 #if defined(PETSC_USE_BOPT_g) 1149 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1); 1150 #endif 1151 col = in[l]; bcol = col/bs; 1152 ridx = row % bs; cidx = col % bs; 1153 if (roworiented) { 1154 value = v[l + k*n]; 1155 } else { 1156 value = v[k + l*m]; 1157 } 1158 if (!sorted) low = 0; high = nrow; 1159 while (high-low > 7) { 1160 t = (low+high)/2; 1161 if (rp[t] > bcol) high = t; 1162 else low = t; 1163 } 1164 for (i=low; i<high; i++) { 1165 if (rp[i] > bcol) break; 1166 if (rp[i] == bcol) { 1167 bap = ap + bs2*i + bs*cidx + ridx; 1168 if (is == ADD_VALUES) *bap += value; 1169 else *bap = value; 1170 goto noinsert1; 1171 } 1172 } 1173 if (nonew == 1) goto noinsert1; 1174 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 1175 if (nrow >= rmax) { 1176 /* there is no extra room in row, therefore enlarge */ 1177 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1178 MatScalar *new_a; 1179 1180 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 1181 1182 /* Malloc new storage space */ 1183 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1184 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1185 new_j = (int*)(new_a + bs2*new_nz); 1186 new_i = new_j + new_nz; 1187 1188 /* copy over old data into new slots */ 1189 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 1190 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1191 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 1192 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1193 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 1194 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1195 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1196 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1197 /* free up old matrix storage */ 1198 ierr = PetscFree(a->a);CHKERRQ(ierr); 1199 if (!a->singlemalloc) { 1200 ierr = PetscFree(a->i);CHKERRQ(ierr); 1201 ierr = PetscFree(a->j);CHKERRQ(ierr); 1202 } 1203 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1204 a->singlemalloc = PETSC_TRUE; 1205 1206 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1207 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1208 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 1209 a->maxnz += bs2*CHUNKSIZE; 1210 a->reallocs++; 1211 a->nz++; 1212 } 1213 N = nrow++ - 1; 1214 /* shift up all the later entries in this row */ 1215 for (ii=N; ii>=i; ii--) { 1216 rp[ii+1] = rp[ii]; 1217 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1218 } 1219 if (N>=i) { 1220 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1221 } 1222 rp[i] = bcol; 1223 ap[bs2*i + bs*cidx + ridx] = value; 1224 noinsert1:; 1225 low = i; 1226 } 1227 ailen[brow] = nrow; 1228 } 1229 PetscFunctionReturn(0); 1230 } 1231 1232 1233 #undef __FUNCT__ 1234 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1235 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info) 1236 { 1237 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1238 Mat outA; 1239 int ierr; 1240 PetscTruth row_identity,col_identity; 1241 1242 PetscFunctionBegin; 1243 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1244 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1245 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1246 if (!row_identity || !col_identity) { 1247 SETERRQ(1,"Row and column permutations must be identity for in-place ILU"); 1248 } 1249 1250 outA = inA; 1251 inA->factor = FACTOR_LU; 1252 1253 if (!a->diag) { 1254 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1255 } 1256 1257 a->row = row; 1258 a->col = col; 1259 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1260 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1261 1262 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1263 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1264 PetscLogObjectParent(inA,a->icol); 1265 1266 /* 1267 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1268 for ILU(0) factorization with natural ordering 1269 */ 1270 if (a->bs < 8) { 1271 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1272 } else { 1273 if (!a->solve_work) { 1274 ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1275 PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar)); 1276 } 1277 } 1278 1279 ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr); 1280 1281 PetscFunctionReturn(0); 1282 } 1283 #undef __FUNCT__ 1284 #define __FUNCT__ "MatPrintHelp_SeqBAIJ" 1285 int MatPrintHelp_SeqBAIJ(Mat A) 1286 { 1287 static PetscTruth called = PETSC_FALSE; 1288 MPI_Comm comm = A->comm; 1289 int ierr; 1290 1291 PetscFunctionBegin; 1292 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1293 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1294 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1295 PetscFunctionReturn(0); 1296 } 1297 1298 EXTERN_C_BEGIN 1299 #undef __FUNCT__ 1300 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1301 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices) 1302 { 1303 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1304 int i,nz,nbs; 1305 1306 PetscFunctionBegin; 1307 nz = baij->maxnz/baij->bs2; 1308 nbs = baij->nbs; 1309 for (i=0; i<nz; i++) { 1310 baij->j[i] = indices[i]; 1311 } 1312 baij->nz = nz; 1313 for (i=0; i<nbs; i++) { 1314 baij->ilen[i] = baij->imax[i]; 1315 } 1316 1317 PetscFunctionReturn(0); 1318 } 1319 EXTERN_C_END 1320 1321 #undef __FUNCT__ 1322 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1323 /*@ 1324 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1325 in the matrix. 1326 1327 Input Parameters: 1328 + mat - the SeqBAIJ matrix 1329 - indices - the column indices 1330 1331 Level: advanced 1332 1333 Notes: 1334 This can be called if you have precomputed the nonzero structure of the 1335 matrix and want to provide it to the matrix object to improve the performance 1336 of the MatSetValues() operation. 1337 1338 You MUST have set the correct numbers of nonzeros per row in the call to 1339 MatCreateSeqBAIJ(). 1340 1341 MUST be called before any calls to MatSetValues(); 1342 1343 @*/ 1344 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices) 1345 { 1346 int ierr,(*f)(Mat,int *); 1347 1348 PetscFunctionBegin; 1349 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1350 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1351 if (f) { 1352 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1353 } else { 1354 SETERRQ(1,"Wrong type of matrix to set column indices"); 1355 } 1356 PetscFunctionReturn(0); 1357 } 1358 1359 #undef __FUNCT__ 1360 #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1361 int MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1362 { 1363 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1364 int ierr,i,j,n,row,bs,*ai,*aj,mbs; 1365 PetscReal atmp; 1366 PetscScalar *x,zero = 0.0; 1367 MatScalar *aa; 1368 int ncols,brow,krow,kcol; 1369 1370 PetscFunctionBegin; 1371 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1372 bs = a->bs; 1373 aa = a->a; 1374 ai = a->i; 1375 aj = a->j; 1376 mbs = a->mbs; 1377 1378 ierr = VecSet(&zero,v);CHKERRQ(ierr); 1379 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1380 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1381 if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1382 for (i=0; i<mbs; i++) { 1383 ncols = ai[1] - ai[0]; ai++; 1384 brow = bs*i; 1385 for (j=0; j<ncols; j++){ 1386 /* bcol = bs*(*aj); */ 1387 for (kcol=0; kcol<bs; kcol++){ 1388 for (krow=0; krow<bs; krow++){ 1389 atmp = PetscAbsScalar(*aa); aa++; 1390 row = brow + krow; /* row index */ 1391 /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */ 1392 if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1393 } 1394 } 1395 aj++; 1396 } 1397 } 1398 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1399 PetscFunctionReturn(0); 1400 } 1401 1402 #undef __FUNCT__ 1403 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1404 int MatSetUpPreallocation_SeqBAIJ(Mat A) 1405 { 1406 int ierr; 1407 1408 PetscFunctionBegin; 1409 ierr = MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1410 PetscFunctionReturn(0); 1411 } 1412 1413 #undef __FUNCT__ 1414 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1415 int MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1416 { 1417 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1418 PetscFunctionBegin; 1419 *array = a->a; 1420 PetscFunctionReturn(0); 1421 } 1422 1423 #undef __FUNCT__ 1424 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1425 int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1426 { 1427 PetscFunctionBegin; 1428 PetscFunctionReturn(0); 1429 } 1430 1431 #include "petscblaslapack.h" 1432 #undef __FUNCT__ 1433 #define __FUNCT__ "MatAXPY_SeqBAIJ" 1434 int MatAXPY_SeqBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str) 1435 { 1436 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 1437 int ierr,one=1,i,bs=y->bs,j,bs2; 1438 1439 PetscFunctionBegin; 1440 if (str == SAME_NONZERO_PATTERN) { 1441 BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one); 1442 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1443 if (y->xtoy && y->XtoY != X) { 1444 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1445 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1446 } 1447 if (!y->xtoy) { /* get xtoy */ 1448 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1449 y->XtoY = X; 1450 } 1451 bs2 = bs*bs; 1452 for (i=0; i<x->nz; i++) { 1453 j = 0; 1454 while (j < bs2){ 1455 y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]); 1456 j++; 1457 } 1458 } 1459 PetscLogInfo(0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %d/%d = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)); 1460 } else { 1461 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1462 } 1463 PetscFunctionReturn(0); 1464 } 1465 1466 /* -------------------------------------------------------------------*/ 1467 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 1468 MatGetRow_SeqBAIJ, 1469 MatRestoreRow_SeqBAIJ, 1470 MatMult_SeqBAIJ_N, 1471 MatMultAdd_SeqBAIJ_N, 1472 MatMultTranspose_SeqBAIJ, 1473 MatMultTransposeAdd_SeqBAIJ, 1474 MatSolve_SeqBAIJ_N, 1475 0, 1476 0, 1477 0, 1478 MatLUFactor_SeqBAIJ, 1479 0, 1480 0, 1481 MatTranspose_SeqBAIJ, 1482 MatGetInfo_SeqBAIJ, 1483 MatEqual_SeqBAIJ, 1484 MatGetDiagonal_SeqBAIJ, 1485 MatDiagonalScale_SeqBAIJ, 1486 MatNorm_SeqBAIJ, 1487 0, 1488 MatAssemblyEnd_SeqBAIJ, 1489 0, 1490 MatSetOption_SeqBAIJ, 1491 MatZeroEntries_SeqBAIJ, 1492 MatZeroRows_SeqBAIJ, 1493 MatLUFactorSymbolic_SeqBAIJ, 1494 MatLUFactorNumeric_SeqBAIJ_N, 1495 0, 1496 0, 1497 MatSetUpPreallocation_SeqBAIJ, 1498 MatILUFactorSymbolic_SeqBAIJ, 1499 0, 1500 MatGetArray_SeqBAIJ, 1501 MatRestoreArray_SeqBAIJ, 1502 MatDuplicate_SeqBAIJ, 1503 0, 1504 0, 1505 MatILUFactor_SeqBAIJ, 1506 0, 1507 MatAXPY_SeqBAIJ, 1508 MatGetSubMatrices_SeqBAIJ, 1509 MatIncreaseOverlap_SeqBAIJ, 1510 MatGetValues_SeqBAIJ, 1511 0, 1512 MatPrintHelp_SeqBAIJ, 1513 MatScale_SeqBAIJ, 1514 0, 1515 0, 1516 0, 1517 MatGetBlockSize_SeqBAIJ, 1518 MatGetRowIJ_SeqBAIJ, 1519 MatRestoreRowIJ_SeqBAIJ, 1520 0, 1521 0, 1522 0, 1523 0, 1524 0, 1525 0, 1526 MatSetValuesBlocked_SeqBAIJ, 1527 MatGetSubMatrix_SeqBAIJ, 1528 MatDestroy_SeqBAIJ, 1529 MatView_SeqBAIJ, 1530 MatGetPetscMaps_Petsc, 1531 0, 1532 0, 1533 0, 1534 0, 1535 0, 1536 0, 1537 MatGetRowMax_SeqBAIJ, 1538 MatConvert_Basic}; 1539 1540 EXTERN_C_BEGIN 1541 #undef __FUNCT__ 1542 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 1543 int MatStoreValues_SeqBAIJ(Mat mat) 1544 { 1545 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1546 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1547 int ierr; 1548 1549 PetscFunctionBegin; 1550 if (aij->nonew != 1) { 1551 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1552 } 1553 1554 /* allocate space for values if not already there */ 1555 if (!aij->saved_values) { 1556 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1557 } 1558 1559 /* copy values over */ 1560 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1561 PetscFunctionReturn(0); 1562 } 1563 EXTERN_C_END 1564 1565 EXTERN_C_BEGIN 1566 #undef __FUNCT__ 1567 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 1568 int MatRetrieveValues_SeqBAIJ(Mat mat) 1569 { 1570 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1571 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 1572 1573 PetscFunctionBegin; 1574 if (aij->nonew != 1) { 1575 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1576 } 1577 if (!aij->saved_values) { 1578 SETERRQ(1,"Must call MatStoreValues(A);first"); 1579 } 1580 1581 /* copy values over */ 1582 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1583 PetscFunctionReturn(0); 1584 } 1585 EXTERN_C_END 1586 1587 EXTERN_C_BEGIN 1588 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*); 1589 EXTERN_C_END 1590 1591 EXTERN_C_BEGIN 1592 #undef __FUNCT__ 1593 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 1594 int MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,int bs,int nz,int *nnz) 1595 { 1596 Mat_SeqBAIJ *b; 1597 int i,len,ierr,mbs,nbs,bs2,newbs = bs; 1598 PetscTruth flg; 1599 1600 PetscFunctionBegin; 1601 1602 B->preallocated = PETSC_TRUE; 1603 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr); 1604 if (nnz && newbs != bs) { 1605 SETERRQ(1,"Cannot change blocksize from command line if setting nnz"); 1606 } 1607 bs = newbs; 1608 1609 mbs = B->m/bs; 1610 nbs = B->n/bs; 1611 bs2 = bs*bs; 1612 1613 if (mbs*bs!=B->m || nbs*bs!=B->n) { 1614 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs); 1615 } 1616 1617 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1618 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1619 if (nnz) { 1620 for (i=0; i<mbs; i++) { 1621 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1622 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs); 1623 } 1624 } 1625 1626 b = (Mat_SeqBAIJ*)B->data; 1627 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1628 B->ops->solve = MatSolve_SeqBAIJ_Update; 1629 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 1630 if (!flg) { 1631 switch (bs) { 1632 case 1: 1633 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1634 B->ops->mult = MatMult_SeqBAIJ_1; 1635 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 1636 break; 1637 case 2: 1638 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 1639 B->ops->mult = MatMult_SeqBAIJ_2; 1640 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 1641 break; 1642 case 3: 1643 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 1644 B->ops->mult = MatMult_SeqBAIJ_3; 1645 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 1646 break; 1647 case 4: 1648 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 1649 B->ops->mult = MatMult_SeqBAIJ_4; 1650 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 1651 break; 1652 case 5: 1653 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 1654 B->ops->mult = MatMult_SeqBAIJ_5; 1655 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 1656 break; 1657 case 6: 1658 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 1659 B->ops->mult = MatMult_SeqBAIJ_6; 1660 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 1661 break; 1662 case 7: 1663 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 1664 B->ops->mult = MatMult_SeqBAIJ_7; 1665 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 1666 break; 1667 default: 1668 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 1669 B->ops->mult = MatMult_SeqBAIJ_N; 1670 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 1671 break; 1672 } 1673 } 1674 b->bs = bs; 1675 b->mbs = mbs; 1676 b->nbs = nbs; 1677 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 1678 if (!nnz) { 1679 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1680 else if (nz <= 0) nz = 1; 1681 for (i=0; i<mbs; i++) b->imax[i] = nz; 1682 nz = nz*mbs; 1683 } else { 1684 nz = 0; 1685 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1686 } 1687 1688 /* allocate the matrix space */ 1689 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 1690 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1691 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1692 b->j = (int*)(b->a + nz*bs2); 1693 ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 1694 b->i = b->j + nz; 1695 b->singlemalloc = PETSC_TRUE; 1696 1697 b->i[0] = 0; 1698 for (i=1; i<mbs+1; i++) { 1699 b->i[i] = b->i[i-1] + b->imax[i-1]; 1700 } 1701 1702 /* b->ilen will count nonzeros in each block row so far. */ 1703 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 1704 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1705 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1706 1707 b->bs = bs; 1708 b->bs2 = bs2; 1709 b->mbs = mbs; 1710 b->nz = 0; 1711 b->maxnz = nz*bs2; 1712 B->info.nz_unneeded = (PetscReal)b->maxnz; 1713 PetscFunctionReturn(0); 1714 } 1715 EXTERN_C_END 1716 1717 /*MC 1718 MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 1719 block sparse compressed row format. 1720 1721 Options Database Keys: 1722 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 1723 1724 Level: beginner 1725 1726 .seealso: MatCreateSeqBAIJ 1727 M*/ 1728 1729 EXTERN_C_BEGIN 1730 #undef __FUNCT__ 1731 #define __FUNCT__ "MatCreate_SeqBAIJ" 1732 int MatCreate_SeqBAIJ(Mat B) 1733 { 1734 int ierr,size; 1735 Mat_SeqBAIJ *b; 1736 1737 PetscFunctionBegin; 1738 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1739 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1740 1741 B->m = B->M = PetscMax(B->m,B->M); 1742 B->n = B->N = PetscMax(B->n,B->N); 1743 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 1744 B->data = (void*)b; 1745 ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 1746 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1747 B->factor = 0; 1748 B->lupivotthreshold = 1.0; 1749 B->mapping = 0; 1750 b->row = 0; 1751 b->col = 0; 1752 b->icol = 0; 1753 b->reallocs = 0; 1754 b->saved_values = 0; 1755 #if defined(PETSC_USE_MAT_SINGLE) 1756 b->setvalueslen = 0; 1757 b->setvaluescopy = PETSC_NULL; 1758 #endif 1759 1760 ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr); 1761 ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr); 1762 1763 b->sorted = PETSC_FALSE; 1764 b->roworiented = PETSC_TRUE; 1765 b->nonew = 0; 1766 b->diag = 0; 1767 b->solve_work = 0; 1768 b->mult_work = 0; 1769 B->spptr = 0; 1770 B->info.nz_unneeded = (PetscReal)b->maxnz; 1771 b->keepzeroedrows = PETSC_FALSE; 1772 b->xtoy = 0; 1773 b->XtoY = 0; 1774 1775 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1776 "MatStoreValues_SeqBAIJ", 1777 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 1778 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1779 "MatRetrieveValues_SeqBAIJ", 1780 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 1781 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 1782 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 1783 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 1784 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 1785 "MatConvert_SeqBAIJ_SeqAIJ", 1786 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 1787 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 1788 "MatSeqBAIJSetPreallocation_SeqBAIJ", 1789 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 1790 PetscFunctionReturn(0); 1791 } 1792 EXTERN_C_END 1793 1794 #undef __FUNCT__ 1795 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 1796 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1797 { 1798 Mat C; 1799 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 1800 int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr; 1801 1802 PetscFunctionBegin; 1803 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1804 1805 *B = 0; 1806 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1807 ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr); 1808 c = (Mat_SeqBAIJ*)C->data; 1809 1810 c->bs = a->bs; 1811 c->bs2 = a->bs2; 1812 c->mbs = a->mbs; 1813 c->nbs = a->nbs; 1814 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1815 1816 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1817 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1818 for (i=0; i<mbs; i++) { 1819 c->imax[i] = a->imax[i]; 1820 c->ilen[i] = a->ilen[i]; 1821 } 1822 1823 /* allocate the matrix space */ 1824 c->singlemalloc = PETSC_TRUE; 1825 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1826 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1827 c->j = (int*)(c->a + nz*bs2); 1828 c->i = c->j + nz; 1829 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1830 if (mbs > 0) { 1831 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1832 if (cpvalues == MAT_COPY_VALUES) { 1833 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1834 } else { 1835 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1836 } 1837 } 1838 1839 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1840 c->sorted = a->sorted; 1841 c->roworiented = a->roworiented; 1842 c->nonew = a->nonew; 1843 1844 if (a->diag) { 1845 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1846 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1847 for (i=0; i<mbs; i++) { 1848 c->diag[i] = a->diag[i]; 1849 } 1850 } else c->diag = 0; 1851 c->nz = a->nz; 1852 c->maxnz = a->maxnz; 1853 c->solve_work = 0; 1854 C->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1855 c->mult_work = 0; 1856 C->preallocated = PETSC_TRUE; 1857 C->assembled = PETSC_TRUE; 1858 *B = C; 1859 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1860 PetscFunctionReturn(0); 1861 } 1862 1863 EXTERN_C_BEGIN 1864 #undef __FUNCT__ 1865 #define __FUNCT__ "MatLoad_SeqBAIJ" 1866 int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A) 1867 { 1868 Mat_SeqBAIJ *a; 1869 Mat B; 1870 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1871 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1872 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1873 int *masked,nmask,tmp,bs2,ishift; 1874 PetscScalar *aa; 1875 MPI_Comm comm = ((PetscObject)viewer)->comm; 1876 1877 PetscFunctionBegin; 1878 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1879 bs2 = bs*bs; 1880 1881 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1882 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1883 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1884 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1885 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1886 M = header[1]; N = header[2]; nz = header[3]; 1887 1888 if (header[3] < 0) { 1889 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 1890 } 1891 1892 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1893 1894 /* 1895 This code adds extra rows to make sure the number of rows is 1896 divisible by the blocksize 1897 */ 1898 mbs = M/bs; 1899 extra_rows = bs - M + bs*(mbs); 1900 if (extra_rows == bs) extra_rows = 0; 1901 else mbs++; 1902 if (extra_rows) { 1903 PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 1904 } 1905 1906 /* read in row lengths */ 1907 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1908 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1909 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1910 1911 /* read in column indices */ 1912 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1913 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1914 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1915 1916 /* loop over row lengths determining block row lengths */ 1917 ierr = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr); 1918 ierr = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1919 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1920 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1921 masked = mask + mbs; 1922 rowcount = 0; nzcount = 0; 1923 for (i=0; i<mbs; i++) { 1924 nmask = 0; 1925 for (j=0; j<bs; j++) { 1926 kmax = rowlengths[rowcount]; 1927 for (k=0; k<kmax; k++) { 1928 tmp = jj[nzcount++]/bs; 1929 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1930 } 1931 rowcount++; 1932 } 1933 browlengths[i] += nmask; 1934 /* zero out the mask elements we set */ 1935 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1936 } 1937 1938 /* create our matrix */ 1939 ierr = MatCreate(comm,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows,&B); 1940 ierr = MatSetType(B,type);CHKERRQ(ierr); 1941 ierr = MatSeqBAIJSetPreallocation(B,bs,0,browlengths);CHKERRQ(ierr); 1942 a = (Mat_SeqBAIJ*)B->data; 1943 1944 /* set matrix "i" values */ 1945 a->i[0] = 0; 1946 for (i=1; i<= mbs; i++) { 1947 a->i[i] = a->i[i-1] + browlengths[i-1]; 1948 a->ilen[i-1] = browlengths[i-1]; 1949 } 1950 a->nz = 0; 1951 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 1952 1953 /* read in nonzero values */ 1954 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1955 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1956 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1957 1958 /* set "a" and "j" values into matrix */ 1959 nzcount = 0; jcount = 0; 1960 for (i=0; i<mbs; i++) { 1961 nzcountb = nzcount; 1962 nmask = 0; 1963 for (j=0; j<bs; j++) { 1964 kmax = rowlengths[i*bs+j]; 1965 for (k=0; k<kmax; k++) { 1966 tmp = jj[nzcount++]/bs; 1967 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1968 } 1969 } 1970 /* sort the masked values */ 1971 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1972 1973 /* set "j" values into matrix */ 1974 maskcount = 1; 1975 for (j=0; j<nmask; j++) { 1976 a->j[jcount++] = masked[j]; 1977 mask[masked[j]] = maskcount++; 1978 } 1979 /* set "a" values into matrix */ 1980 ishift = bs2*a->i[i]; 1981 for (j=0; j<bs; j++) { 1982 kmax = rowlengths[i*bs+j]; 1983 for (k=0; k<kmax; k++) { 1984 tmp = jj[nzcountb]/bs ; 1985 block = mask[tmp] - 1; 1986 point = jj[nzcountb] - bs*tmp; 1987 idx = ishift + bs2*block + j + bs*point; 1988 a->a[idx] = (MatScalar)aa[nzcountb++]; 1989 } 1990 } 1991 /* zero out the mask elements we set */ 1992 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1993 } 1994 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1995 1996 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1997 ierr = PetscFree(browlengths);CHKERRQ(ierr); 1998 ierr = PetscFree(aa);CHKERRQ(ierr); 1999 ierr = PetscFree(jj);CHKERRQ(ierr); 2000 ierr = PetscFree(mask);CHKERRQ(ierr); 2001 2002 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2003 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2004 ierr = MatView_Private(B);CHKERRQ(ierr); 2005 2006 *A = B; 2007 PetscFunctionReturn(0); 2008 } 2009 EXTERN_C_END 2010 2011 #undef __FUNCT__ 2012 #define __FUNCT__ "MatCreateSeqBAIJ" 2013 /*@C 2014 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 2015 compressed row) format. For good matrix assembly performance the 2016 user should preallocate the matrix storage by setting the parameter nz 2017 (or the array nnz). By setting these parameters accurately, performance 2018 during matrix assembly can be increased by more than a factor of 50. 2019 2020 Collective on MPI_Comm 2021 2022 Input Parameters: 2023 + comm - MPI communicator, set to PETSC_COMM_SELF 2024 . bs - size of block 2025 . m - number of rows 2026 . n - number of columns 2027 . nz - number of nonzero blocks per block row (same for all rows) 2028 - nnz - array containing the number of nonzero blocks in the various block rows 2029 (possibly different for each block row) or PETSC_NULL 2030 2031 Output Parameter: 2032 . A - the matrix 2033 2034 Options Database Keys: 2035 . -mat_no_unroll - uses code that does not unroll the loops in the 2036 block calculations (much slower) 2037 . -mat_block_size - size of the blocks to use 2038 2039 Level: intermediate 2040 2041 Notes: 2042 A nonzero block is any block that as 1 or more nonzeros in it 2043 2044 The block AIJ format is fully compatible with standard Fortran 77 2045 storage. That is, the stored row and column indices can begin at 2046 either one (as in Fortran) or zero. See the users' manual for details. 2047 2048 Specify the preallocated storage with either nz or nnz (not both). 2049 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2050 allocation. For additional details, see the users manual chapter on 2051 matrices. 2052 2053 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2054 @*/ 2055 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A) 2056 { 2057 int ierr; 2058 2059 PetscFunctionBegin; 2060 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 2061 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2062 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 2063 PetscFunctionReturn(0); 2064 } 2065 2066 #undef __FUNCT__ 2067 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 2068 /*@C 2069 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 2070 per row in the matrix. For good matrix assembly performance the 2071 user should preallocate the matrix storage by setting the parameter nz 2072 (or the array nnz). By setting these parameters accurately, performance 2073 during matrix assembly can be increased by more than a factor of 50. 2074 2075 Collective on MPI_Comm 2076 2077 Input Parameters: 2078 + A - the matrix 2079 . bs - size of block 2080 . nz - number of block nonzeros per block row (same for all rows) 2081 - nnz - array containing the number of block nonzeros in the various block rows 2082 (possibly different for each block row) or PETSC_NULL 2083 2084 Options Database Keys: 2085 . -mat_no_unroll - uses code that does not unroll the loops in the 2086 block calculations (much slower) 2087 . -mat_block_size - size of the blocks to use 2088 2089 Level: intermediate 2090 2091 Notes: 2092 The block AIJ format is fully compatible with standard Fortran 77 2093 storage. That is, the stored row and column indices can begin at 2094 either one (as in Fortran) or zero. See the users' manual for details. 2095 2096 Specify the preallocated storage with either nz or nnz (not both). 2097 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2098 allocation. For additional details, see the users manual chapter on 2099 matrices. 2100 2101 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2102 @*/ 2103 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[]) 2104 { 2105 int ierr,(*f)(Mat,int,int,const int[]); 2106 2107 PetscFunctionBegin; 2108 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2109 if (f) { 2110 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 2111 } 2112 PetscFunctionReturn(0); 2113 } 2114