1 /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/ 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" 8 #include "src/vec/vecimpl.h" 9 #include "src/inline/spops.h" 10 #include "petscsys.h" /*I "petscmat.h" I*/ 11 12 /* 13 Special version for Fun3d sequential benchmark 14 */ 15 #if defined(PETSC_HAVE_FORTRAN_CAPS) 16 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 17 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 18 #define matsetvaluesblocked4_ matsetvaluesblocked4 19 #endif 20 21 EXTERN_C_BEGIN 22 #undef __FUNCT__ 23 #define __FUNCT__ "matsetvaluesblocked4_" 24 void matsetvaluesblocked4_(Mat *AA,int *mm,const int im[],int *nn,const int in[],const PetscScalar v[]) 25 { 26 Mat A = *AA; 27 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 28 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 29 int *ai=a->i,*ailen=a->ilen; 30 int *aj=a->j,stepval; 31 const PetscScalar *value = v; 32 MatScalar *ap,*aa = a->a,*bap; 33 34 PetscFunctionBegin; 35 stepval = (n-1)*4; 36 for (k=0; k<m; k++) { /* loop over added rows */ 37 row = im[k]; 38 rp = aj + ai[row]; 39 ap = aa + 16*ai[row]; 40 nrow = ailen[row]; 41 low = 0; 42 for (l=0; l<n; l++) { /* loop over added columns */ 43 col = in[l]; 44 value = v + k*(stepval+4)*4 + l*4; 45 low = 0; high = nrow; 46 while (high-low > 7) { 47 t = (low+high)/2; 48 if (rp[t] > col) high = t; 49 else low = t; 50 } 51 for (i=low; i<high; i++) { 52 if (rp[i] > col) break; 53 if (rp[i] == col) { 54 bap = ap + 16*i; 55 for (ii=0; ii<4; ii++,value+=stepval) { 56 for (jj=ii; jj<16; jj+=4) { 57 bap[jj] += *value++; 58 } 59 } 60 goto noinsert2; 61 } 62 } 63 N = nrow++ - 1; 64 /* shift up all the later entries in this row */ 65 for (ii=N; ii>=i; ii--) { 66 rp[ii+1] = rp[ii]; 67 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 68 } 69 if (N >= i) { 70 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 71 } 72 rp[i] = col; 73 bap = ap + 16*i; 74 for (ii=0; ii<4; ii++,value+=stepval) { 75 for (jj=ii; jj<16; jj+=4) { 76 bap[jj] = *value++; 77 } 78 } 79 noinsert2:; 80 low = i; 81 } 82 ailen[row] = nrow; 83 } 84 } 85 EXTERN_C_END 86 87 #if defined(PETSC_HAVE_FORTRAN_CAPS) 88 #define matsetvalues4_ MATSETVALUES4 89 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 90 #define matsetvalues4_ matsetvalues4 91 #endif 92 93 EXTERN_C_BEGIN 94 #undef __FUNCT__ 95 #define __FUNCT__ "MatSetValues4_" 96 void matsetvalues4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v) 97 { 98 Mat A = *AA; 99 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 100 int *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 101 int *ai=a->i,*ailen=a->ilen; 102 int *aj=a->j,brow,bcol; 103 int ridx,cidx; 104 MatScalar *ap,value,*aa=a->a,*bap; 105 106 PetscFunctionBegin; 107 for (k=0; k<m; k++) { /* loop over added rows */ 108 row = im[k]; brow = row/4; 109 rp = aj + ai[brow]; 110 ap = aa + 16*ai[brow]; 111 nrow = ailen[brow]; 112 low = 0; 113 for (l=0; l<n; l++) { /* loop over added columns */ 114 col = in[l]; bcol = col/4; 115 ridx = row % 4; cidx = col % 4; 116 value = v[l + k*n]; 117 low = 0; high = nrow; 118 while (high-low > 7) { 119 t = (low+high)/2; 120 if (rp[t] > bcol) high = t; 121 else low = t; 122 } 123 for (i=low; i<high; i++) { 124 if (rp[i] > bcol) break; 125 if (rp[i] == bcol) { 126 bap = ap + 16*i + 4*cidx + ridx; 127 *bap += value; 128 goto noinsert1; 129 } 130 } 131 N = nrow++ - 1; 132 /* shift up all the later entries in this row */ 133 for (ii=N; ii>=i; ii--) { 134 rp[ii+1] = rp[ii]; 135 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 136 } 137 if (N>=i) { 138 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 139 } 140 rp[i] = bcol; 141 ap[16*i + 4*cidx + ridx] = value; 142 noinsert1:; 143 low = i; 144 } 145 ailen[brow] = nrow; 146 } 147 } 148 EXTERN_C_END 149 150 /* UGLY, ugly, ugly 151 When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 152 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 153 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 154 converts the entries into single precision and then calls ..._MatScalar() to put them 155 into the single precision data structures. 156 */ 157 #if defined(PETSC_USE_MAT_SINGLE) 158 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode); 159 #else 160 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 161 #endif 162 163 #define CHUNKSIZE 10 164 165 /* 166 Checks for missing diagonals 167 */ 168 #undef __FUNCT__ 169 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 170 int MatMissingDiagonal_SeqBAIJ(Mat A) 171 { 172 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 173 int *diag,*jj = a->j,i,ierr; 174 175 PetscFunctionBegin; 176 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 177 diag = a->diag; 178 for (i=0; i<a->mbs; i++) { 179 if (jj[diag[i]] != i) { 180 SETERRQ1(1,"Matrix is missing diagonal number %d",i); 181 } 182 } 183 PetscFunctionReturn(0); 184 } 185 186 #undef __FUNCT__ 187 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 188 int MatMarkDiagonal_SeqBAIJ(Mat A) 189 { 190 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 191 int i,j,*diag,m = a->mbs,ierr; 192 193 PetscFunctionBegin; 194 if (a->diag) PetscFunctionReturn(0); 195 196 ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr); 197 PetscLogObjectMemory(A,(m+1)*sizeof(int)); 198 for (i=0; i<m; i++) { 199 diag[i] = a->i[i+1]; 200 for (j=a->i[i]; j<a->i[i+1]; j++) { 201 if (a->j[j] == i) { 202 diag[i] = j; 203 break; 204 } 205 } 206 } 207 a->diag = diag; 208 PetscFunctionReturn(0); 209 } 210 211 212 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 213 214 #undef __FUNCT__ 215 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 216 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done) 217 { 218 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 219 int ierr,n = a->mbs,i; 220 221 PetscFunctionBegin; 222 *nn = n; 223 if (!ia) PetscFunctionReturn(0); 224 if (symmetric) { 225 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 226 } else if (oshift == 1) { 227 /* temporarily add 1 to i and j indices */ 228 int nz = a->i[n]; 229 for (i=0; i<nz; i++) a->j[i]++; 230 for (i=0; i<n+1; i++) a->i[i]++; 231 *ia = a->i; *ja = a->j; 232 } else { 233 *ia = a->i; *ja = a->j; 234 } 235 236 PetscFunctionReturn(0); 237 } 238 239 #undef __FUNCT__ 240 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 241 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done) 242 { 243 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 244 int i,n = a->mbs,ierr; 245 246 PetscFunctionBegin; 247 if (!ia) PetscFunctionReturn(0); 248 if (symmetric) { 249 ierr = PetscFree(*ia);CHKERRQ(ierr); 250 ierr = PetscFree(*ja);CHKERRQ(ierr); 251 } else if (oshift == 1) { 252 int nz = a->i[n]-1; 253 for (i=0; i<nz; i++) a->j[i]--; 254 for (i=0; i<n+1; i++) a->i[i]--; 255 } 256 PetscFunctionReturn(0); 257 } 258 259 #undef __FUNCT__ 260 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ" 261 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs) 262 { 263 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 264 265 PetscFunctionBegin; 266 *bs = baij->bs; 267 PetscFunctionReturn(0); 268 } 269 270 271 #undef __FUNCT__ 272 #define __FUNCT__ "MatDestroy_SeqBAIJ" 273 int MatDestroy_SeqBAIJ(Mat A) 274 { 275 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 276 int ierr; 277 278 PetscFunctionBegin; 279 #if defined(PETSC_USE_LOG) 280 PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz); 281 #endif 282 ierr = PetscFree(a->a);CHKERRQ(ierr); 283 if (!a->singlemalloc) { 284 ierr = PetscFree(a->i);CHKERRQ(ierr); 285 ierr = PetscFree(a->j);CHKERRQ(ierr); 286 } 287 if (a->row) { 288 ierr = ISDestroy(a->row);CHKERRQ(ierr); 289 } 290 if (a->col) { 291 ierr = ISDestroy(a->col);CHKERRQ(ierr); 292 } 293 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 294 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 295 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 296 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 297 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 298 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 299 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 300 #if defined(PETSC_USE_MAT_SINGLE) 301 if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);} 302 #endif 303 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 304 305 ierr = PetscFree(a);CHKERRQ(ierr); 306 PetscFunctionReturn(0); 307 } 308 309 #undef __FUNCT__ 310 #define __FUNCT__ "MatSetOption_SeqBAIJ" 311 int MatSetOption_SeqBAIJ(Mat A,MatOption op) 312 { 313 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 314 315 PetscFunctionBegin; 316 switch (op) { 317 case MAT_ROW_ORIENTED: 318 a->roworiented = PETSC_TRUE; 319 break; 320 case MAT_COLUMN_ORIENTED: 321 a->roworiented = PETSC_FALSE; 322 break; 323 case MAT_COLUMNS_SORTED: 324 a->sorted = PETSC_TRUE; 325 break; 326 case MAT_COLUMNS_UNSORTED: 327 a->sorted = PETSC_FALSE; 328 break; 329 case MAT_KEEP_ZEROED_ROWS: 330 a->keepzeroedrows = PETSC_TRUE; 331 break; 332 case MAT_NO_NEW_NONZERO_LOCATIONS: 333 a->nonew = 1; 334 break; 335 case MAT_NEW_NONZERO_LOCATION_ERR: 336 a->nonew = -1; 337 break; 338 case MAT_NEW_NONZERO_ALLOCATION_ERR: 339 a->nonew = -2; 340 break; 341 case MAT_YES_NEW_NONZERO_LOCATIONS: 342 a->nonew = 0; 343 break; 344 case MAT_ROWS_SORTED: 345 case MAT_ROWS_UNSORTED: 346 case MAT_YES_NEW_DIAGONALS: 347 case MAT_IGNORE_OFF_PROC_ENTRIES: 348 case MAT_USE_HASH_TABLE: 349 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 350 break; 351 case MAT_NO_NEW_DIAGONALS: 352 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 353 case MAT_SYMMETRIC: 354 case MAT_STRUCTURALLY_SYMMETRIC: 355 break; 356 default: 357 SETERRQ(PETSC_ERR_SUP,"unknown option"); 358 } 359 PetscFunctionReturn(0); 360 } 361 362 #undef __FUNCT__ 363 #define __FUNCT__ "MatGetRow_SeqBAIJ" 364 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 365 { 366 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 367 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr; 368 MatScalar *aa,*aa_i; 369 PetscScalar *v_i; 370 371 PetscFunctionBegin; 372 bs = a->bs; 373 ai = a->i; 374 aj = a->j; 375 aa = a->a; 376 bs2 = a->bs2; 377 378 if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %d out of range", row); 379 380 bn = row/bs; /* Block number */ 381 bp = row % bs; /* Block Position */ 382 M = ai[bn+1] - ai[bn]; 383 *nz = bs*M; 384 385 if (v) { 386 *v = 0; 387 if (*nz) { 388 ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 389 for (i=0; i<M; i++) { /* for each block in the block row */ 390 v_i = *v + i*bs; 391 aa_i = aa + bs2*(ai[bn] + i); 392 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 393 } 394 } 395 } 396 397 if (idx) { 398 *idx = 0; 399 if (*nz) { 400 ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr); 401 for (i=0; i<M; i++) { /* for each block in the block row */ 402 idx_i = *idx + i*bs; 403 itmp = bs*aj[ai[bn] + i]; 404 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 405 } 406 } 407 } 408 PetscFunctionReturn(0); 409 } 410 411 #undef __FUNCT__ 412 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 413 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 414 { 415 int ierr; 416 417 PetscFunctionBegin; 418 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 419 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 420 PetscFunctionReturn(0); 421 } 422 423 #undef __FUNCT__ 424 #define __FUNCT__ "MatTranspose_SeqBAIJ" 425 int MatTranspose_SeqBAIJ(Mat A,Mat *B) 426 { 427 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 428 Mat C; 429 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 430 int *rows,*cols,bs2=a->bs2; 431 PetscScalar *array; 432 433 PetscFunctionBegin; 434 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 435 ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr); 436 ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr); 437 438 #if defined(PETSC_USE_MAT_SINGLE) 439 ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 440 for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 441 #else 442 array = a->a; 443 #endif 444 445 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 446 ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr); 447 ierr = PetscFree(col);CHKERRQ(ierr); 448 ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr); 449 cols = rows + bs; 450 for (i=0; i<mbs; i++) { 451 cols[0] = i*bs; 452 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 453 len = ai[i+1] - ai[i]; 454 for (j=0; j<len; j++) { 455 rows[0] = (*aj++)*bs; 456 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 457 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 458 array += bs2; 459 } 460 } 461 ierr = PetscFree(rows);CHKERRQ(ierr); 462 #if defined(PETSC_USE_MAT_SINGLE) 463 ierr = PetscFree(array); 464 #endif 465 466 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 467 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 468 469 if (B) { 470 *B = C; 471 } else { 472 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 473 } 474 PetscFunctionReturn(0); 475 } 476 477 #undef __FUNCT__ 478 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 479 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 480 { 481 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 482 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 483 PetscScalar *aa; 484 FILE *file; 485 486 PetscFunctionBegin; 487 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 488 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 489 col_lens[0] = MAT_FILE_COOKIE; 490 491 col_lens[1] = A->m; 492 col_lens[2] = A->n; 493 col_lens[3] = a->nz*bs2; 494 495 /* store lengths of each row and write (including header) to file */ 496 count = 0; 497 for (i=0; i<a->mbs; i++) { 498 for (j=0; j<bs; j++) { 499 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 500 } 501 } 502 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 503 ierr = PetscFree(col_lens);CHKERRQ(ierr); 504 505 /* store column indices (zero start index) */ 506 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 507 count = 0; 508 for (i=0; i<a->mbs; i++) { 509 for (j=0; j<bs; j++) { 510 for (k=a->i[i]; k<a->i[i+1]; k++) { 511 for (l=0; l<bs; l++) { 512 jj[count++] = bs*a->j[k] + l; 513 } 514 } 515 } 516 } 517 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr); 518 ierr = PetscFree(jj);CHKERRQ(ierr); 519 520 /* store nonzero values */ 521 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 522 count = 0; 523 for (i=0; i<a->mbs; i++) { 524 for (j=0; j<bs; j++) { 525 for (k=a->i[i]; k<a->i[i+1]; k++) { 526 for (l=0; l<bs; l++) { 527 aa[count++] = a->a[bs2*k + l*bs + j]; 528 } 529 } 530 } 531 } 532 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr); 533 ierr = PetscFree(aa);CHKERRQ(ierr); 534 535 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 536 if (file) { 537 fprintf(file,"-matload_block_size %d\n",a->bs); 538 } 539 PetscFunctionReturn(0); 540 } 541 542 #undef __FUNCT__ 543 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 544 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 545 { 546 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 547 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 548 PetscViewerFormat format; 549 550 PetscFunctionBegin; 551 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 552 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 553 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 554 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 555 Mat aij; 556 ierr = MatConvert(A,MATSEQAIJ,&aij);CHKERRQ(ierr); 557 ierr = MatView(aij,viewer);CHKERRQ(ierr); 558 ierr = MatDestroy(aij);CHKERRQ(ierr); 559 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 560 PetscFunctionReturn(0); 561 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 562 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 563 for (i=0; i<a->mbs; i++) { 564 for (j=0; j<bs; j++) { 565 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 566 for (k=a->i[i]; k<a->i[i+1]; k++) { 567 for (l=0; l<bs; l++) { 568 #if defined(PETSC_USE_COMPLEX) 569 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 570 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %gi) ",bs*a->j[k]+l, 571 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 572 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 573 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %gi) ",bs*a->j[k]+l, 574 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 575 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 576 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 577 } 578 #else 579 if (a->a[bs2*k + l*bs + j] != 0.0) { 580 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 581 } 582 #endif 583 } 584 } 585 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 586 } 587 } 588 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 589 } else { 590 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 591 for (i=0; i<a->mbs; i++) { 592 for (j=0; j<bs; j++) { 593 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 594 for (k=a->i[i]; k<a->i[i+1]; k++) { 595 for (l=0; l<bs; l++) { 596 #if defined(PETSC_USE_COMPLEX) 597 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 598 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 599 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 600 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 601 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 602 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 603 } else { 604 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 605 } 606 #else 607 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 608 #endif 609 } 610 } 611 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 612 } 613 } 614 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 615 } 616 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 617 PetscFunctionReturn(0); 618 } 619 620 #undef __FUNCT__ 621 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 622 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 623 { 624 Mat A = (Mat) Aa; 625 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 626 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2; 627 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 628 MatScalar *aa; 629 PetscViewer viewer; 630 631 PetscFunctionBegin; 632 633 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 634 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 635 636 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 637 638 /* loop over matrix elements drawing boxes */ 639 color = PETSC_DRAW_BLUE; 640 for (i=0,row=0; i<mbs; i++,row+=bs) { 641 for (j=a->i[i]; j<a->i[i+1]; j++) { 642 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 643 x_l = a->j[j]*bs; x_r = x_l + 1.0; 644 aa = a->a + j*bs2; 645 for (k=0; k<bs; k++) { 646 for (l=0; l<bs; l++) { 647 if (PetscRealPart(*aa++) >= 0.) continue; 648 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 649 } 650 } 651 } 652 } 653 color = PETSC_DRAW_CYAN; 654 for (i=0,row=0; i<mbs; i++,row+=bs) { 655 for (j=a->i[i]; j<a->i[i+1]; j++) { 656 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 657 x_l = a->j[j]*bs; x_r = x_l + 1.0; 658 aa = a->a + j*bs2; 659 for (k=0; k<bs; k++) { 660 for (l=0; l<bs; l++) { 661 if (PetscRealPart(*aa++) != 0.) continue; 662 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 663 } 664 } 665 } 666 } 667 668 color = PETSC_DRAW_RED; 669 for (i=0,row=0; i<mbs; i++,row+=bs) { 670 for (j=a->i[i]; j<a->i[i+1]; j++) { 671 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 672 x_l = a->j[j]*bs; x_r = x_l + 1.0; 673 aa = a->a + j*bs2; 674 for (k=0; k<bs; k++) { 675 for (l=0; l<bs; l++) { 676 if (PetscRealPart(*aa++) <= 0.) continue; 677 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 678 } 679 } 680 } 681 } 682 PetscFunctionReturn(0); 683 } 684 685 #undef __FUNCT__ 686 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 687 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 688 { 689 int ierr; 690 PetscReal xl,yl,xr,yr,w,h; 691 PetscDraw draw; 692 PetscTruth isnull; 693 694 PetscFunctionBegin; 695 696 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 697 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 698 699 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 700 xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0; 701 xr += w; yr += h; xl = -w; yl = -h; 702 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 703 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 704 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 705 PetscFunctionReturn(0); 706 } 707 708 #undef __FUNCT__ 709 #define __FUNCT__ "MatView_SeqBAIJ" 710 int MatView_SeqBAIJ(Mat A,PetscViewer viewer) 711 { 712 int ierr; 713 PetscTruth issocket,isascii,isbinary,isdraw; 714 715 PetscFunctionBegin; 716 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 717 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 718 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 719 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 720 if (issocket) { 721 SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 722 } else if (isascii){ 723 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 724 } else if (isbinary) { 725 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 726 } else if (isdraw) { 727 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 728 } else { 729 SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name); 730 } 731 PetscFunctionReturn(0); 732 } 733 734 735 #undef __FUNCT__ 736 #define __FUNCT__ "MatGetValues_SeqBAIJ" 737 int MatGetValues_SeqBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[]) 738 { 739 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 740 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 741 int *ai = a->i,*ailen = a->ilen; 742 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 743 MatScalar *ap,*aa = a->a,zero = 0.0; 744 745 PetscFunctionBegin; 746 for (k=0; k<m; k++) { /* loop over rows */ 747 row = im[k]; brow = row/bs; 748 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 749 if (row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %d too large", row); 750 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 751 nrow = ailen[brow]; 752 for (l=0; l<n; l++) { /* loop over columns */ 753 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 754 if (in[l] >= A->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %d too large", in[l]); 755 col = in[l] ; 756 bcol = col/bs; 757 cidx = col%bs; 758 ridx = row%bs; 759 high = nrow; 760 low = 0; /* assume unsorted */ 761 while (high-low > 5) { 762 t = (low+high)/2; 763 if (rp[t] > bcol) high = t; 764 else low = t; 765 } 766 for (i=low; i<high; i++) { 767 if (rp[i] > bcol) break; 768 if (rp[i] == bcol) { 769 *v++ = ap[bs2*i+bs*cidx+ridx]; 770 goto finished; 771 } 772 } 773 *v++ = zero; 774 finished:; 775 } 776 } 777 PetscFunctionReturn(0); 778 } 779 780 #if defined(PETSC_USE_MAT_SINGLE) 781 #undef __FUNCT__ 782 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 783 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv) 784 { 785 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 786 int ierr,i,N = m*n*b->bs2; 787 MatScalar *vsingle; 788 789 PetscFunctionBegin; 790 if (N > b->setvalueslen) { 791 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 792 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 793 b->setvalueslen = N; 794 } 795 vsingle = b->setvaluescopy; 796 for (i=0; i<N; i++) { 797 vsingle[i] = v[i]; 798 } 799 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 800 PetscFunctionReturn(0); 801 } 802 #endif 803 804 805 #undef __FUNCT__ 806 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 807 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode is) 808 { 809 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 810 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 811 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 812 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 813 PetscTruth roworiented=a->roworiented; 814 const MatScalar *value = v; 815 MatScalar *ap,*aa = a->a,*bap; 816 817 PetscFunctionBegin; 818 if (roworiented) { 819 stepval = (n-1)*bs; 820 } else { 821 stepval = (m-1)*bs; 822 } 823 for (k=0; k<m; k++) { /* loop over added rows */ 824 row = im[k]; 825 if (row < 0) continue; 826 #if defined(PETSC_USE_BOPT_g) 827 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1); 828 #endif 829 rp = aj + ai[row]; 830 ap = aa + bs2*ai[row]; 831 rmax = imax[row]; 832 nrow = ailen[row]; 833 low = 0; 834 for (l=0; l<n; l++) { /* loop over added columns */ 835 if (in[l] < 0) continue; 836 #if defined(PETSC_USE_BOPT_g) 837 if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],a->nbs-1); 838 #endif 839 col = in[l]; 840 if (roworiented) { 841 value = v + k*(stepval+bs)*bs + l*bs; 842 } else { 843 value = v + l*(stepval+bs)*bs + k*bs; 844 } 845 if (!sorted) low = 0; high = nrow; 846 while (high-low > 7) { 847 t = (low+high)/2; 848 if (rp[t] > col) high = t; 849 else low = t; 850 } 851 for (i=low; i<high; i++) { 852 if (rp[i] > col) break; 853 if (rp[i] == col) { 854 bap = ap + bs2*i; 855 if (roworiented) { 856 if (is == ADD_VALUES) { 857 for (ii=0; ii<bs; ii++,value+=stepval) { 858 for (jj=ii; jj<bs2; jj+=bs) { 859 bap[jj] += *value++; 860 } 861 } 862 } else { 863 for (ii=0; ii<bs; ii++,value+=stepval) { 864 for (jj=ii; jj<bs2; jj+=bs) { 865 bap[jj] = *value++; 866 } 867 } 868 } 869 } else { 870 if (is == ADD_VALUES) { 871 for (ii=0; ii<bs; ii++,value+=stepval) { 872 for (jj=0; jj<bs; jj++) { 873 *bap++ += *value++; 874 } 875 } 876 } else { 877 for (ii=0; ii<bs; ii++,value+=stepval) { 878 for (jj=0; jj<bs; jj++) { 879 *bap++ = *value++; 880 } 881 } 882 } 883 } 884 goto noinsert2; 885 } 886 } 887 if (nonew == 1) goto noinsert2; 888 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 889 if (nrow >= rmax) { 890 /* there is no extra room in row, therefore enlarge */ 891 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 892 MatScalar *new_a; 893 894 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 895 896 /* malloc new storage space */ 897 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 898 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 899 new_j = (int*)(new_a + bs2*new_nz); 900 new_i = new_j + new_nz; 901 902 /* copy over old data into new slots */ 903 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 904 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 905 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 906 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 907 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 908 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 909 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 910 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 911 /* free up old matrix storage */ 912 ierr = PetscFree(a->a);CHKERRQ(ierr); 913 if (!a->singlemalloc) { 914 ierr = PetscFree(a->i);CHKERRQ(ierr); 915 ierr = PetscFree(a->j);CHKERRQ(ierr); 916 } 917 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 918 a->singlemalloc = PETSC_TRUE; 919 920 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 921 rmax = imax[row] = imax[row] + CHUNKSIZE; 922 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 923 a->maxnz += bs2*CHUNKSIZE; 924 a->reallocs++; 925 a->nz++; 926 } 927 N = nrow++ - 1; 928 /* shift up all the later entries in this row */ 929 for (ii=N; ii>=i; ii--) { 930 rp[ii+1] = rp[ii]; 931 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 932 } 933 if (N >= i) { 934 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 935 } 936 rp[i] = col; 937 bap = ap + bs2*i; 938 if (roworiented) { 939 for (ii=0; ii<bs; ii++,value+=stepval) { 940 for (jj=ii; jj<bs2; jj+=bs) { 941 bap[jj] = *value++; 942 } 943 } 944 } else { 945 for (ii=0; ii<bs; ii++,value+=stepval) { 946 for (jj=0; jj<bs; jj++) { 947 *bap++ = *value++; 948 } 949 } 950 } 951 noinsert2:; 952 low = i; 953 } 954 ailen[row] = nrow; 955 } 956 PetscFunctionReturn(0); 957 } 958 959 #undef __FUNCT__ 960 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 961 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 962 { 963 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 964 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 965 int m = A->m,*ip,N,*ailen = a->ilen; 966 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 967 MatScalar *aa = a->a,*ap; 968 969 PetscFunctionBegin; 970 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 971 972 if (m) rmax = ailen[0]; 973 for (i=1; i<mbs; i++) { 974 /* move each row back by the amount of empty slots (fshift) before it*/ 975 fshift += imax[i-1] - ailen[i-1]; 976 rmax = PetscMax(rmax,ailen[i]); 977 if (fshift) { 978 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 979 N = ailen[i]; 980 for (j=0; j<N; j++) { 981 ip[j-fshift] = ip[j]; 982 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 983 } 984 } 985 ai[i] = ai[i-1] + ailen[i-1]; 986 } 987 if (mbs) { 988 fshift += imax[mbs-1] - ailen[mbs-1]; 989 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 990 } 991 /* reset ilen and imax for each row */ 992 for (i=0; i<mbs; i++) { 993 ailen[i] = imax[i] = ai[i+1] - ai[i]; 994 } 995 a->nz = ai[mbs]; 996 997 /* diagonals may have moved, so kill the diagonal pointers */ 998 if (fshift && a->diag) { 999 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1000 PetscLogObjectMemory(A,-(mbs+1)*sizeof(int)); 1001 a->diag = 0; 1002 } 1003 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2); 1004 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs); 1005 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 1006 a->reallocs = 0; 1007 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1008 1009 PetscFunctionReturn(0); 1010 } 1011 1012 1013 1014 /* 1015 This function returns an array of flags which indicate the locations of contiguous 1016 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1017 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1018 Assume: sizes should be long enough to hold all the values. 1019 */ 1020 #undef __FUNCT__ 1021 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1022 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 1023 { 1024 int i,j,k,row; 1025 PetscTruth flg; 1026 1027 PetscFunctionBegin; 1028 for (i=0,j=0; i<n; j++) { 1029 row = idx[i]; 1030 if (row%bs!=0) { /* Not the begining of a block */ 1031 sizes[j] = 1; 1032 i++; 1033 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1034 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1035 i++; 1036 } else { /* Begining of the block, so check if the complete block exists */ 1037 flg = PETSC_TRUE; 1038 for (k=1; k<bs; k++) { 1039 if (row+k != idx[i+k]) { /* break in the block */ 1040 flg = PETSC_FALSE; 1041 break; 1042 } 1043 } 1044 if (flg == PETSC_TRUE) { /* No break in the bs */ 1045 sizes[j] = bs; 1046 i+= bs; 1047 } else { 1048 sizes[j] = 1; 1049 i++; 1050 } 1051 } 1052 } 1053 *bs_max = j; 1054 PetscFunctionReturn(0); 1055 } 1056 1057 #undef __FUNCT__ 1058 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 1059 int MatZeroRows_SeqBAIJ(Mat A,IS is,const PetscScalar *diag) 1060 { 1061 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1062 int ierr,i,j,k,count,is_n,*is_idx,*rows; 1063 int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max; 1064 PetscScalar zero = 0.0; 1065 MatScalar *aa; 1066 1067 PetscFunctionBegin; 1068 /* Make a copy of the IS and sort it */ 1069 ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr); 1070 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1071 1072 /* allocate memory for rows,sizes */ 1073 ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr); 1074 sizes = rows + is_n; 1075 1076 /* copy IS values to rows, and sort them */ 1077 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1078 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1079 if (baij->keepzeroedrows) { 1080 for (i=0; i<is_n; i++) { sizes[i] = 1; } 1081 bs_max = is_n; 1082 } else { 1083 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1084 } 1085 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 1086 1087 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1088 row = rows[j]; 1089 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row); 1090 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1091 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 1092 if (sizes[i] == bs && !baij->keepzeroedrows) { 1093 if (diag) { 1094 if (baij->ilen[row/bs] > 0) { 1095 baij->ilen[row/bs] = 1; 1096 baij->j[baij->i[row/bs]] = row/bs; 1097 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 1098 } 1099 /* Now insert all the diagonal values for this bs */ 1100 for (k=0; k<bs; k++) { 1101 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 1102 } 1103 } else { /* (!diag) */ 1104 baij->ilen[row/bs] = 0; 1105 } /* end (!diag) */ 1106 } else { /* (sizes[i] != bs) */ 1107 #if defined (PETSC_USE_DEBUG) 1108 if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1"); 1109 #endif 1110 for (k=0; k<count; k++) { 1111 aa[0] = zero; 1112 aa += bs; 1113 } 1114 if (diag) { 1115 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1116 } 1117 } 1118 } 1119 1120 ierr = PetscFree(rows);CHKERRQ(ierr); 1121 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1122 PetscFunctionReturn(0); 1123 } 1124 1125 #undef __FUNCT__ 1126 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1127 int MatSetValues_SeqBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is) 1128 { 1129 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1130 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 1131 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1132 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 1133 int ridx,cidx,bs2=a->bs2,ierr; 1134 PetscTruth roworiented=a->roworiented; 1135 MatScalar *ap,value,*aa=a->a,*bap; 1136 1137 PetscFunctionBegin; 1138 for (k=0; k<m; k++) { /* loop over added rows */ 1139 row = im[k]; brow = row/bs; 1140 if (row < 0) continue; 1141 #if defined(PETSC_USE_BOPT_g) 1142 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1); 1143 #endif 1144 rp = aj + ai[brow]; 1145 ap = aa + bs2*ai[brow]; 1146 rmax = imax[brow]; 1147 nrow = ailen[brow]; 1148 low = 0; 1149 for (l=0; l<n; l++) { /* loop over added columns */ 1150 if (in[l] < 0) continue; 1151 #if defined(PETSC_USE_BOPT_g) 1152 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1); 1153 #endif 1154 col = in[l]; bcol = col/bs; 1155 ridx = row % bs; cidx = col % bs; 1156 if (roworiented) { 1157 value = v[l + k*n]; 1158 } else { 1159 value = v[k + l*m]; 1160 } 1161 if (!sorted) low = 0; high = nrow; 1162 while (high-low > 7) { 1163 t = (low+high)/2; 1164 if (rp[t] > bcol) high = t; 1165 else low = t; 1166 } 1167 for (i=low; i<high; i++) { 1168 if (rp[i] > bcol) break; 1169 if (rp[i] == bcol) { 1170 bap = ap + bs2*i + bs*cidx + ridx; 1171 if (is == ADD_VALUES) *bap += value; 1172 else *bap = value; 1173 goto noinsert1; 1174 } 1175 } 1176 if (nonew == 1) goto noinsert1; 1177 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 1178 if (nrow >= rmax) { 1179 /* there is no extra room in row, therefore enlarge */ 1180 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1181 MatScalar *new_a; 1182 1183 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 1184 1185 /* Malloc new storage space */ 1186 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1187 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1188 new_j = (int*)(new_a + bs2*new_nz); 1189 new_i = new_j + new_nz; 1190 1191 /* copy over old data into new slots */ 1192 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 1193 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1194 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 1195 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1196 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 1197 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1198 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1199 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1200 /* free up old matrix storage */ 1201 ierr = PetscFree(a->a);CHKERRQ(ierr); 1202 if (!a->singlemalloc) { 1203 ierr = PetscFree(a->i);CHKERRQ(ierr); 1204 ierr = PetscFree(a->j);CHKERRQ(ierr); 1205 } 1206 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1207 a->singlemalloc = PETSC_TRUE; 1208 1209 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1210 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1211 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 1212 a->maxnz += bs2*CHUNKSIZE; 1213 a->reallocs++; 1214 a->nz++; 1215 } 1216 N = nrow++ - 1; 1217 /* shift up all the later entries in this row */ 1218 for (ii=N; ii>=i; ii--) { 1219 rp[ii+1] = rp[ii]; 1220 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1221 } 1222 if (N>=i) { 1223 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1224 } 1225 rp[i] = bcol; 1226 ap[bs2*i + bs*cidx + ridx] = value; 1227 noinsert1:; 1228 low = i; 1229 } 1230 ailen[brow] = nrow; 1231 } 1232 PetscFunctionReturn(0); 1233 } 1234 1235 1236 #undef __FUNCT__ 1237 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1238 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info) 1239 { 1240 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1241 Mat outA; 1242 int ierr; 1243 PetscTruth row_identity,col_identity; 1244 1245 PetscFunctionBegin; 1246 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1247 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1248 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1249 if (!row_identity || !col_identity) { 1250 SETERRQ(1,"Row and column permutations must be identity for in-place ILU"); 1251 } 1252 1253 outA = inA; 1254 inA->factor = FACTOR_LU; 1255 1256 if (!a->diag) { 1257 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1258 } 1259 1260 a->row = row; 1261 a->col = col; 1262 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1263 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1264 1265 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1266 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1267 PetscLogObjectParent(inA,a->icol); 1268 1269 /* 1270 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1271 for ILU(0) factorization with natural ordering 1272 */ 1273 if (a->bs < 8) { 1274 ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1275 } else { 1276 if (!a->solve_work) { 1277 ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1278 PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar)); 1279 } 1280 } 1281 1282 ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr); 1283 1284 PetscFunctionReturn(0); 1285 } 1286 #undef __FUNCT__ 1287 #define __FUNCT__ "MatPrintHelp_SeqBAIJ" 1288 int MatPrintHelp_SeqBAIJ(Mat A) 1289 { 1290 static PetscTruth called = PETSC_FALSE; 1291 MPI_Comm comm = A->comm; 1292 int ierr; 1293 1294 PetscFunctionBegin; 1295 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1296 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1297 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1298 PetscFunctionReturn(0); 1299 } 1300 1301 EXTERN_C_BEGIN 1302 #undef __FUNCT__ 1303 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1304 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices) 1305 { 1306 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1307 int i,nz,nbs; 1308 1309 PetscFunctionBegin; 1310 nz = baij->maxnz/baij->bs2; 1311 nbs = baij->nbs; 1312 for (i=0; i<nz; i++) { 1313 baij->j[i] = indices[i]; 1314 } 1315 baij->nz = nz; 1316 for (i=0; i<nbs; i++) { 1317 baij->ilen[i] = baij->imax[i]; 1318 } 1319 1320 PetscFunctionReturn(0); 1321 } 1322 EXTERN_C_END 1323 1324 #undef __FUNCT__ 1325 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1326 /*@ 1327 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1328 in the matrix. 1329 1330 Input Parameters: 1331 + mat - the SeqBAIJ matrix 1332 - indices - the column indices 1333 1334 Level: advanced 1335 1336 Notes: 1337 This can be called if you have precomputed the nonzero structure of the 1338 matrix and want to provide it to the matrix object to improve the performance 1339 of the MatSetValues() operation. 1340 1341 You MUST have set the correct numbers of nonzeros per row in the call to 1342 MatCreateSeqBAIJ(). 1343 1344 MUST be called before any calls to MatSetValues(); 1345 1346 @*/ 1347 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices) 1348 { 1349 int ierr,(*f)(Mat,int *); 1350 1351 PetscFunctionBegin; 1352 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1353 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1354 if (f) { 1355 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1356 } else { 1357 SETERRQ(1,"Wrong type of matrix to set column indices"); 1358 } 1359 PetscFunctionReturn(0); 1360 } 1361 1362 #undef __FUNCT__ 1363 #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1364 int MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1365 { 1366 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1367 int ierr,i,j,n,row,bs,*ai,*aj,mbs; 1368 PetscReal atmp; 1369 PetscScalar *x,zero = 0.0; 1370 MatScalar *aa; 1371 int ncols,brow,krow,kcol; 1372 1373 PetscFunctionBegin; 1374 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1375 bs = a->bs; 1376 aa = a->a; 1377 ai = a->i; 1378 aj = a->j; 1379 mbs = a->mbs; 1380 1381 ierr = VecSet(&zero,v);CHKERRQ(ierr); 1382 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1383 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1384 if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1385 for (i=0; i<mbs; i++) { 1386 ncols = ai[1] - ai[0]; ai++; 1387 brow = bs*i; 1388 for (j=0; j<ncols; j++){ 1389 /* bcol = bs*(*aj); */ 1390 for (kcol=0; kcol<bs; kcol++){ 1391 for (krow=0; krow<bs; krow++){ 1392 atmp = PetscAbsScalar(*aa); aa++; 1393 row = brow + krow; /* row index */ 1394 /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */ 1395 if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1396 } 1397 } 1398 aj++; 1399 } 1400 } 1401 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1402 PetscFunctionReturn(0); 1403 } 1404 1405 #undef __FUNCT__ 1406 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1407 int MatSetUpPreallocation_SeqBAIJ(Mat A) 1408 { 1409 int ierr; 1410 1411 PetscFunctionBegin; 1412 ierr = MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1413 PetscFunctionReturn(0); 1414 } 1415 1416 #undef __FUNCT__ 1417 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1418 int MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1419 { 1420 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1421 PetscFunctionBegin; 1422 *array = a->a; 1423 PetscFunctionReturn(0); 1424 } 1425 1426 #undef __FUNCT__ 1427 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1428 int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 1429 { 1430 PetscFunctionBegin; 1431 PetscFunctionReturn(0); 1432 } 1433 1434 #include "petscblaslapack.h" 1435 #undef __FUNCT__ 1436 #define __FUNCT__ "MatAXPY_SeqBAIJ" 1437 int MatAXPY_SeqBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str) 1438 { 1439 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 1440 int ierr,one=1,i,bs=y->bs,j,bs2; 1441 1442 PetscFunctionBegin; 1443 if (str == SAME_NONZERO_PATTERN) { 1444 BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one); 1445 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1446 if (y->xtoy && y->XtoY != X) { 1447 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1448 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1449 } 1450 if (!y->xtoy) { /* get xtoy */ 1451 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1452 y->XtoY = X; 1453 } 1454 bs2 = bs*bs; 1455 for (i=0; i<x->nz; i++) { 1456 j = 0; 1457 while (j < bs2){ 1458 y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]); 1459 j++; 1460 } 1461 } 1462 PetscLogInfo(0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %d/%d = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)); 1463 } else { 1464 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1465 } 1466 PetscFunctionReturn(0); 1467 } 1468 1469 /* -------------------------------------------------------------------*/ 1470 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 1471 MatGetRow_SeqBAIJ, 1472 MatRestoreRow_SeqBAIJ, 1473 MatMult_SeqBAIJ_N, 1474 /* 4*/ MatMultAdd_SeqBAIJ_N, 1475 MatMultTranspose_SeqBAIJ, 1476 MatMultTransposeAdd_SeqBAIJ, 1477 MatSolve_SeqBAIJ_N, 1478 0, 1479 0, 1480 /*10*/ 0, 1481 MatLUFactor_SeqBAIJ, 1482 0, 1483 0, 1484 MatTranspose_SeqBAIJ, 1485 /*15*/ MatGetInfo_SeqBAIJ, 1486 MatEqual_SeqBAIJ, 1487 MatGetDiagonal_SeqBAIJ, 1488 MatDiagonalScale_SeqBAIJ, 1489 MatNorm_SeqBAIJ, 1490 /*20*/ 0, 1491 MatAssemblyEnd_SeqBAIJ, 1492 0, 1493 MatSetOption_SeqBAIJ, 1494 MatZeroEntries_SeqBAIJ, 1495 /*25*/ MatZeroRows_SeqBAIJ, 1496 MatLUFactorSymbolic_SeqBAIJ, 1497 MatLUFactorNumeric_SeqBAIJ_N, 1498 0, 1499 0, 1500 /*30*/ MatSetUpPreallocation_SeqBAIJ, 1501 MatILUFactorSymbolic_SeqBAIJ, 1502 0, 1503 MatGetArray_SeqBAIJ, 1504 MatRestoreArray_SeqBAIJ, 1505 /*35*/ MatDuplicate_SeqBAIJ, 1506 0, 1507 0, 1508 MatILUFactor_SeqBAIJ, 1509 0, 1510 /*40*/ MatAXPY_SeqBAIJ, 1511 MatGetSubMatrices_SeqBAIJ, 1512 MatIncreaseOverlap_SeqBAIJ, 1513 MatGetValues_SeqBAIJ, 1514 0, 1515 /*45*/ MatPrintHelp_SeqBAIJ, 1516 MatScale_SeqBAIJ, 1517 0, 1518 0, 1519 0, 1520 /*50*/ MatGetBlockSize_SeqBAIJ, 1521 MatGetRowIJ_SeqBAIJ, 1522 MatRestoreRowIJ_SeqBAIJ, 1523 0, 1524 0, 1525 /*55*/ 0, 1526 0, 1527 0, 1528 0, 1529 MatSetValuesBlocked_SeqBAIJ, 1530 /*60*/ MatGetSubMatrix_SeqBAIJ, 1531 MatDestroy_SeqBAIJ, 1532 MatView_SeqBAIJ, 1533 MatGetPetscMaps_Petsc, 1534 0, 1535 /*65*/ 0, 1536 0, 1537 0, 1538 0, 1539 0, 1540 /*70*/ MatGetRowMax_SeqBAIJ, 1541 MatConvert_Basic, 1542 0, 1543 0, 1544 0, 1545 /*75*/ 0, 1546 0, 1547 0, 1548 0, 1549 0, 1550 /*80*/ 0, 1551 0, 1552 0, 1553 0, 1554 0, 1555 /*85*/ MatLoad_SeqBAIJ 1556 }; 1557 1558 EXTERN_C_BEGIN 1559 #undef __FUNCT__ 1560 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 1561 int MatStoreValues_SeqBAIJ(Mat mat) 1562 { 1563 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1564 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1565 int ierr; 1566 1567 PetscFunctionBegin; 1568 if (aij->nonew != 1) { 1569 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1570 } 1571 1572 /* allocate space for values if not already there */ 1573 if (!aij->saved_values) { 1574 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1575 } 1576 1577 /* copy values over */ 1578 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1579 PetscFunctionReturn(0); 1580 } 1581 EXTERN_C_END 1582 1583 EXTERN_C_BEGIN 1584 #undef __FUNCT__ 1585 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 1586 int MatRetrieveValues_SeqBAIJ(Mat mat) 1587 { 1588 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1589 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 1590 1591 PetscFunctionBegin; 1592 if (aij->nonew != 1) { 1593 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1594 } 1595 if (!aij->saved_values) { 1596 SETERRQ(1,"Must call MatStoreValues(A);first"); 1597 } 1598 1599 /* copy values over */ 1600 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1601 PetscFunctionReturn(0); 1602 } 1603 EXTERN_C_END 1604 1605 EXTERN_C_BEGIN 1606 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*); 1607 EXTERN_C_END 1608 1609 EXTERN_C_BEGIN 1610 #undef __FUNCT__ 1611 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 1612 int MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,int bs,int nz,int *nnz) 1613 { 1614 Mat_SeqBAIJ *b; 1615 int i,len,ierr,mbs,nbs,bs2,newbs = bs; 1616 PetscTruth flg; 1617 1618 PetscFunctionBegin; 1619 1620 B->preallocated = PETSC_TRUE; 1621 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr); 1622 if (nnz && newbs != bs) { 1623 SETERRQ(1,"Cannot change blocksize from command line if setting nnz"); 1624 } 1625 bs = newbs; 1626 1627 mbs = B->m/bs; 1628 nbs = B->n/bs; 1629 bs2 = bs*bs; 1630 1631 if (mbs*bs!=B->m || nbs*bs!=B->n) { 1632 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs); 1633 } 1634 1635 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1636 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1637 if (nnz) { 1638 for (i=0; i<mbs; i++) { 1639 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1640 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs); 1641 } 1642 } 1643 1644 b = (Mat_SeqBAIJ*)B->data; 1645 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1646 B->ops->solve = MatSolve_SeqBAIJ_Update; 1647 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 1648 if (!flg) { 1649 switch (bs) { 1650 case 1: 1651 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1652 B->ops->mult = MatMult_SeqBAIJ_1; 1653 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 1654 break; 1655 case 2: 1656 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 1657 B->ops->mult = MatMult_SeqBAIJ_2; 1658 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 1659 break; 1660 case 3: 1661 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 1662 B->ops->mult = MatMult_SeqBAIJ_3; 1663 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 1664 break; 1665 case 4: 1666 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 1667 B->ops->mult = MatMult_SeqBAIJ_4; 1668 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 1669 break; 1670 case 5: 1671 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 1672 B->ops->mult = MatMult_SeqBAIJ_5; 1673 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 1674 break; 1675 case 6: 1676 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 1677 B->ops->mult = MatMult_SeqBAIJ_6; 1678 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 1679 break; 1680 case 7: 1681 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 1682 B->ops->mult = MatMult_SeqBAIJ_7; 1683 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 1684 break; 1685 default: 1686 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 1687 B->ops->mult = MatMult_SeqBAIJ_N; 1688 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 1689 break; 1690 } 1691 } 1692 b->bs = bs; 1693 b->mbs = mbs; 1694 b->nbs = nbs; 1695 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 1696 if (!nnz) { 1697 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1698 else if (nz <= 0) nz = 1; 1699 for (i=0; i<mbs; i++) b->imax[i] = nz; 1700 nz = nz*mbs; 1701 } else { 1702 nz = 0; 1703 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1704 } 1705 1706 /* allocate the matrix space */ 1707 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 1708 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1709 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1710 b->j = (int*)(b->a + nz*bs2); 1711 ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 1712 b->i = b->j + nz; 1713 b->singlemalloc = PETSC_TRUE; 1714 1715 b->i[0] = 0; 1716 for (i=1; i<mbs+1; i++) { 1717 b->i[i] = b->i[i-1] + b->imax[i-1]; 1718 } 1719 1720 /* b->ilen will count nonzeros in each block row so far. */ 1721 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 1722 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1723 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1724 1725 b->bs = bs; 1726 b->bs2 = bs2; 1727 b->mbs = mbs; 1728 b->nz = 0; 1729 b->maxnz = nz*bs2; 1730 B->info.nz_unneeded = (PetscReal)b->maxnz; 1731 PetscFunctionReturn(0); 1732 } 1733 EXTERN_C_END 1734 1735 /*MC 1736 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 1737 block sparse compressed row format. 1738 1739 Options Database Keys: 1740 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 1741 1742 Level: beginner 1743 1744 .seealso: MatCreateSeqBAIJ 1745 M*/ 1746 1747 EXTERN_C_BEGIN 1748 #undef __FUNCT__ 1749 #define __FUNCT__ "MatCreate_SeqBAIJ" 1750 int MatCreate_SeqBAIJ(Mat B) 1751 { 1752 int ierr,size; 1753 Mat_SeqBAIJ *b; 1754 1755 PetscFunctionBegin; 1756 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1757 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1758 1759 B->m = B->M = PetscMax(B->m,B->M); 1760 B->n = B->N = PetscMax(B->n,B->N); 1761 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 1762 B->data = (void*)b; 1763 ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 1764 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1765 B->factor = 0; 1766 B->lupivotthreshold = 1.0; 1767 B->mapping = 0; 1768 b->row = 0; 1769 b->col = 0; 1770 b->icol = 0; 1771 b->reallocs = 0; 1772 b->saved_values = 0; 1773 #if defined(PETSC_USE_MAT_SINGLE) 1774 b->setvalueslen = 0; 1775 b->setvaluescopy = PETSC_NULL; 1776 #endif 1777 1778 ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr); 1779 ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr); 1780 1781 b->sorted = PETSC_FALSE; 1782 b->roworiented = PETSC_TRUE; 1783 b->nonew = 0; 1784 b->diag = 0; 1785 b->solve_work = 0; 1786 b->mult_work = 0; 1787 B->spptr = 0; 1788 B->info.nz_unneeded = (PetscReal)b->maxnz; 1789 b->keepzeroedrows = PETSC_FALSE; 1790 b->xtoy = 0; 1791 b->XtoY = 0; 1792 1793 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1794 "MatStoreValues_SeqBAIJ", 1795 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 1796 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1797 "MatRetrieveValues_SeqBAIJ", 1798 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 1799 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 1800 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 1801 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 1802 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 1803 "MatConvert_SeqBAIJ_SeqAIJ", 1804 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 1805 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C", 1806 "MatSeqBAIJSetPreallocation_SeqBAIJ", 1807 MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 1808 PetscFunctionReturn(0); 1809 } 1810 EXTERN_C_END 1811 1812 #undef __FUNCT__ 1813 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 1814 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1815 { 1816 Mat C; 1817 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 1818 int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr; 1819 1820 PetscFunctionBegin; 1821 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1822 1823 *B = 0; 1824 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1825 ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr); 1826 c = (Mat_SeqBAIJ*)C->data; 1827 1828 c->bs = a->bs; 1829 c->bs2 = a->bs2; 1830 c->mbs = a->mbs; 1831 c->nbs = a->nbs; 1832 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1833 1834 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1835 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1836 for (i=0; i<mbs; i++) { 1837 c->imax[i] = a->imax[i]; 1838 c->ilen[i] = a->ilen[i]; 1839 } 1840 1841 /* allocate the matrix space */ 1842 c->singlemalloc = PETSC_TRUE; 1843 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1844 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1845 c->j = (int*)(c->a + nz*bs2); 1846 c->i = c->j + nz; 1847 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1848 if (mbs > 0) { 1849 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1850 if (cpvalues == MAT_COPY_VALUES) { 1851 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1852 } else { 1853 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1854 } 1855 } 1856 1857 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1858 c->sorted = a->sorted; 1859 c->roworiented = a->roworiented; 1860 c->nonew = a->nonew; 1861 1862 if (a->diag) { 1863 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1864 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1865 for (i=0; i<mbs; i++) { 1866 c->diag[i] = a->diag[i]; 1867 } 1868 } else c->diag = 0; 1869 c->nz = a->nz; 1870 c->maxnz = a->maxnz; 1871 c->solve_work = 0; 1872 C->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1873 c->mult_work = 0; 1874 C->preallocated = PETSC_TRUE; 1875 C->assembled = PETSC_TRUE; 1876 *B = C; 1877 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1878 PetscFunctionReturn(0); 1879 } 1880 1881 #undef __FUNCT__ 1882 #define __FUNCT__ "MatLoad_SeqBAIJ" 1883 int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A) 1884 { 1885 Mat_SeqBAIJ *a; 1886 Mat B; 1887 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1888 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1889 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1890 int *masked,nmask,tmp,bs2,ishift; 1891 PetscScalar *aa; 1892 MPI_Comm comm = ((PetscObject)viewer)->comm; 1893 1894 PetscFunctionBegin; 1895 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1896 bs2 = bs*bs; 1897 1898 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1899 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1900 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1901 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1902 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1903 M = header[1]; N = header[2]; nz = header[3]; 1904 1905 if (header[3] < 0) { 1906 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 1907 } 1908 1909 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1910 1911 /* 1912 This code adds extra rows to make sure the number of rows is 1913 divisible by the blocksize 1914 */ 1915 mbs = M/bs; 1916 extra_rows = bs - M + bs*(mbs); 1917 if (extra_rows == bs) extra_rows = 0; 1918 else mbs++; 1919 if (extra_rows) { 1920 PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 1921 } 1922 1923 /* read in row lengths */ 1924 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1925 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1926 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1927 1928 /* read in column indices */ 1929 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1930 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1931 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1932 1933 /* loop over row lengths determining block row lengths */ 1934 ierr = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr); 1935 ierr = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1936 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1937 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1938 masked = mask + mbs; 1939 rowcount = 0; nzcount = 0; 1940 for (i=0; i<mbs; i++) { 1941 nmask = 0; 1942 for (j=0; j<bs; j++) { 1943 kmax = rowlengths[rowcount]; 1944 for (k=0; k<kmax; k++) { 1945 tmp = jj[nzcount++]/bs; 1946 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1947 } 1948 rowcount++; 1949 } 1950 browlengths[i] += nmask; 1951 /* zero out the mask elements we set */ 1952 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1953 } 1954 1955 /* create our matrix */ 1956 ierr = MatCreate(comm,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows,&B); 1957 ierr = MatSetType(B,type);CHKERRQ(ierr); 1958 ierr = MatSeqBAIJSetPreallocation(B,bs,0,browlengths);CHKERRQ(ierr); 1959 a = (Mat_SeqBAIJ*)B->data; 1960 1961 /* set matrix "i" values */ 1962 a->i[0] = 0; 1963 for (i=1; i<= mbs; i++) { 1964 a->i[i] = a->i[i-1] + browlengths[i-1]; 1965 a->ilen[i-1] = browlengths[i-1]; 1966 } 1967 a->nz = 0; 1968 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 1969 1970 /* read in nonzero values */ 1971 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1972 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1973 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1974 1975 /* set "a" and "j" values into matrix */ 1976 nzcount = 0; jcount = 0; 1977 for (i=0; i<mbs; i++) { 1978 nzcountb = nzcount; 1979 nmask = 0; 1980 for (j=0; j<bs; j++) { 1981 kmax = rowlengths[i*bs+j]; 1982 for (k=0; k<kmax; k++) { 1983 tmp = jj[nzcount++]/bs; 1984 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1985 } 1986 } 1987 /* sort the masked values */ 1988 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1989 1990 /* set "j" values into matrix */ 1991 maskcount = 1; 1992 for (j=0; j<nmask; j++) { 1993 a->j[jcount++] = masked[j]; 1994 mask[masked[j]] = maskcount++; 1995 } 1996 /* set "a" values into matrix */ 1997 ishift = bs2*a->i[i]; 1998 for (j=0; j<bs; j++) { 1999 kmax = rowlengths[i*bs+j]; 2000 for (k=0; k<kmax; k++) { 2001 tmp = jj[nzcountb]/bs ; 2002 block = mask[tmp] - 1; 2003 point = jj[nzcountb] - bs*tmp; 2004 idx = ishift + bs2*block + j + bs*point; 2005 a->a[idx] = (MatScalar)aa[nzcountb++]; 2006 } 2007 } 2008 /* zero out the mask elements we set */ 2009 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 2010 } 2011 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 2012 2013 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2014 ierr = PetscFree(browlengths);CHKERRQ(ierr); 2015 ierr = PetscFree(aa);CHKERRQ(ierr); 2016 ierr = PetscFree(jj);CHKERRQ(ierr); 2017 ierr = PetscFree(mask);CHKERRQ(ierr); 2018 2019 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2020 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2021 ierr = MatView_Private(B);CHKERRQ(ierr); 2022 2023 *A = B; 2024 PetscFunctionReturn(0); 2025 } 2026 2027 #undef __FUNCT__ 2028 #define __FUNCT__ "MatCreateSeqBAIJ" 2029 /*@C 2030 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 2031 compressed row) format. For good matrix assembly performance the 2032 user should preallocate the matrix storage by setting the parameter nz 2033 (or the array nnz). By setting these parameters accurately, performance 2034 during matrix assembly can be increased by more than a factor of 50. 2035 2036 Collective on MPI_Comm 2037 2038 Input Parameters: 2039 + comm - MPI communicator, set to PETSC_COMM_SELF 2040 . bs - size of block 2041 . m - number of rows 2042 . n - number of columns 2043 . nz - number of nonzero blocks per block row (same for all rows) 2044 - nnz - array containing the number of nonzero blocks in the various block rows 2045 (possibly different for each block row) or PETSC_NULL 2046 2047 Output Parameter: 2048 . A - the matrix 2049 2050 Options Database Keys: 2051 . -mat_no_unroll - uses code that does not unroll the loops in the 2052 block calculations (much slower) 2053 . -mat_block_size - size of the blocks to use 2054 2055 Level: intermediate 2056 2057 Notes: 2058 A nonzero block is any block that as 1 or more nonzeros in it 2059 2060 The block AIJ format is fully compatible with standard Fortran 77 2061 storage. That is, the stored row and column indices can begin at 2062 either one (as in Fortran) or zero. See the users' manual for details. 2063 2064 Specify the preallocated storage with either nz or nnz (not both). 2065 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2066 allocation. For additional details, see the users manual chapter on 2067 matrices. 2068 2069 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2070 @*/ 2071 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A) 2072 { 2073 int ierr; 2074 2075 PetscFunctionBegin; 2076 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 2077 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 2078 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 2079 PetscFunctionReturn(0); 2080 } 2081 2082 #undef __FUNCT__ 2083 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 2084 /*@C 2085 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 2086 per row in the matrix. For good matrix assembly performance the 2087 user should preallocate the matrix storage by setting the parameter nz 2088 (or the array nnz). By setting these parameters accurately, performance 2089 during matrix assembly can be increased by more than a factor of 50. 2090 2091 Collective on MPI_Comm 2092 2093 Input Parameters: 2094 + A - the matrix 2095 . bs - size of block 2096 . nz - number of block nonzeros per block row (same for all rows) 2097 - nnz - array containing the number of block nonzeros in the various block rows 2098 (possibly different for each block row) or PETSC_NULL 2099 2100 Options Database Keys: 2101 . -mat_no_unroll - uses code that does not unroll the loops in the 2102 block calculations (much slower) 2103 . -mat_block_size - size of the blocks to use 2104 2105 Level: intermediate 2106 2107 Notes: 2108 The block AIJ format is fully compatible with standard Fortran 77 2109 storage. That is, the stored row and column indices can begin at 2110 either one (as in Fortran) or zero. See the users' manual for details. 2111 2112 Specify the preallocated storage with either nz or nnz (not both). 2113 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2114 allocation. For additional details, see the users manual chapter on 2115 matrices. 2116 2117 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 2118 @*/ 2119 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[]) 2120 { 2121 int ierr,(*f)(Mat,int,int,const int[]); 2122 2123 PetscFunctionBegin; 2124 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 2125 if (f) { 2126 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 2127 } 2128 PetscFunctionReturn(0); 2129 } 2130