1 2 #ifndef lint 3 static char vcid[] = "$Id: baij.c,v 1.62 1996/08/01 22:21:46 balay Exp bsmith $"; 4 #endif 5 6 /* 7 Defines the basic matrix operations for the BAIJ (compressed row) 8 matrix storage format. 9 */ 10 #include "baij.h" 11 #include "src/vec/vecimpl.h" 12 #include "src/inline/spops.h" 13 #include "petsc.h" 14 15 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 16 17 static int MatGetReordering_SeqBAIJ(Mat A,MatReordering type,IS *rperm,IS *cperm) 18 { 19 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 20 int ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift; 21 22 /* 23 this is tacky: In the future when we have written special factorization 24 and solve routines for the identity permutation we should use a 25 stride index set instead of the general one. 26 */ 27 if (type == ORDER_NATURAL) { 28 idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx); 29 for ( i=0; i<n; i++ ) idx[i] = i; 30 ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr); 31 ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr); 32 PetscFree(idx); 33 ISSetPermutation(*rperm); 34 ISSetPermutation(*cperm); 35 ISSetIdentity(*rperm); 36 ISSetIdentity(*cperm); 37 return 0; 38 } 39 40 MatReorderingRegisterAll(); 41 ishift = 0; 42 oshift = -MatReorderingIndexShift[(int)type]; 43 if (MatReorderingRequiresSymmetric[(int)type]) { 44 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,ishift,oshift,&ia,&ja);CHKERRQ(ierr); 45 ierr = MatGetReordering_IJ(n,ia,ja,type,rperm,cperm); CHKERRQ(ierr); 46 PetscFree(ia); PetscFree(ja); 47 } else { 48 if (ishift == oshift) { 49 ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 50 } 51 else if (ishift == -1) { 52 /* temporarily subtract 1 from i and j indices */ 53 int nz = a->i[n] - 1; 54 for ( i=0; i<nz; i++ ) a->j[i]--; 55 for ( i=0; i<n+1; i++ ) a->i[i]--; 56 ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 57 for ( i=0; i<nz; i++ ) a->j[i]++; 58 for ( i=0; i<n+1; i++ ) a->i[i]++; 59 } else { 60 /* temporarily add 1 to i and j indices */ 61 int nz = a->i[n] - 1; 62 for ( i=0; i<nz; i++ ) a->j[i]++; 63 for ( i=0; i<n+1; i++ ) a->i[i]++; 64 ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 65 for ( i=0; i<nz; i++ ) a->j[i]--; 66 for ( i=0; i<n+1; i++ ) a->i[i]--; 67 } 68 } 69 return 0; 70 } 71 72 /* 73 Adds diagonal pointers to sparse matrix structure. 74 */ 75 76 int MatMarkDiag_SeqBAIJ(Mat A) 77 { 78 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 79 int i,j, *diag, m = a->mbs; 80 81 diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag); 82 PLogObjectMemory(A,(m+1)*sizeof(int)); 83 for ( i=0; i<m; i++ ) { 84 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 85 if (a->j[j] == i) { 86 diag[i] = j; 87 break; 88 } 89 } 90 } 91 a->diag = diag; 92 return 0; 93 } 94 95 #include "draw.h" 96 #include "pinclude/pviewer.h" 97 #include "sys.h" 98 99 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 100 { 101 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 102 int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 103 Scalar *aa; 104 105 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 106 col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 107 col_lens[0] = MAT_COOKIE; 108 col_lens[1] = a->m; 109 col_lens[2] = a->n; 110 col_lens[3] = a->nz*bs2; 111 112 /* store lengths of each row and write (including header) to file */ 113 count = 0; 114 for ( i=0; i<a->mbs; i++ ) { 115 for ( j=0; j<bs; j++ ) { 116 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 117 } 118 } 119 ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr); 120 PetscFree(col_lens); 121 122 /* store column indices (zero start index) */ 123 jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj); 124 count = 0; 125 for ( i=0; i<a->mbs; i++ ) { 126 for ( j=0; j<bs; j++ ) { 127 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 128 for ( l=0; l<bs; l++ ) { 129 jj[count++] = bs*a->j[k] + l; 130 } 131 } 132 } 133 } 134 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr); 135 PetscFree(jj); 136 137 /* store nonzero values */ 138 aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa); 139 count = 0; 140 for ( i=0; i<a->mbs; i++ ) { 141 for ( j=0; j<bs; j++ ) { 142 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 143 for ( l=0; l<bs; l++ ) { 144 aa[count++] = a->a[bs2*k + l*bs + j]; 145 } 146 } 147 } 148 } 149 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr); 150 PetscFree(aa); 151 return 0; 152 } 153 154 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 155 { 156 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 157 int ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2; 158 FILE *fd; 159 char *outputname; 160 161 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 162 ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr); 163 ierr = ViewerGetFormat(viewer,&format); 164 if (format == ASCII_FORMAT_INFO || format == ASCII_FORMAT_INFO_DETAILED) { 165 fprintf(fd," block size is %d\n",bs); 166 } 167 else if (format == ASCII_FORMAT_MATLAB) { 168 SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported"); 169 } 170 else if (format == ASCII_FORMAT_COMMON) { 171 for ( i=0; i<a->mbs; i++ ) { 172 for ( j=0; j<bs; j++ ) { 173 fprintf(fd,"row %d:",i*bs+j); 174 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 175 for ( l=0; l<bs; l++ ) { 176 #if defined(PETSC_COMPLEX) 177 if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0) 178 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 179 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 180 else if (real(a->a[bs2*k + l*bs + j]) != 0.0) 181 fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 182 #else 183 if (a->a[bs2*k + l*bs + j] != 0.0) 184 fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 185 #endif 186 } 187 } 188 fprintf(fd,"\n"); 189 } 190 } 191 } 192 else { 193 for ( i=0; i<a->mbs; i++ ) { 194 for ( j=0; j<bs; j++ ) { 195 fprintf(fd,"row %d:",i*bs+j); 196 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 197 for ( l=0; l<bs; l++ ) { 198 #if defined(PETSC_COMPLEX) 199 if (imag(a->a[bs2*k + l*bs + j]) != 0.0) { 200 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 201 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 202 } 203 else { 204 fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 205 } 206 #else 207 fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 208 #endif 209 } 210 } 211 fprintf(fd,"\n"); 212 } 213 } 214 } 215 fflush(fd); 216 return 0; 217 } 218 219 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer) 220 { 221 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *) A->data; 222 int row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2; 223 double xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r; 224 Scalar *aa; 225 Draw draw; 226 DrawButton button; 227 PetscTruth isnull; 228 229 ViewerDrawGetDraw(viewer,&draw); 230 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 231 232 xr = a->n; yr = a->m; h = yr/10.0; w = xr/10.0; 233 xr += w; yr += h; xl = -w; yl = -h; 234 ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr); 235 /* loop over matrix elements drawing boxes */ 236 color = DRAW_BLUE; 237 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 238 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 239 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 240 x_l = a->j[j]*bs; x_r = x_l + 1.0; 241 aa = a->a + j*bs2; 242 for ( k=0; k<bs; k++ ) { 243 for ( l=0; l<bs; l++ ) { 244 if (PetscReal(*aa++) >= 0.) continue; 245 DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 246 } 247 } 248 } 249 } 250 color = DRAW_CYAN; 251 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 252 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 253 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 254 x_l = a->j[j]*bs; x_r = x_l + 1.0; 255 aa = a->a + j*bs2; 256 for ( k=0; k<bs; k++ ) { 257 for ( l=0; l<bs; l++ ) { 258 if (PetscReal(*aa++) != 0.) continue; 259 DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 260 } 261 } 262 } 263 } 264 265 color = DRAW_RED; 266 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 267 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 268 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 269 x_l = a->j[j]*bs; x_r = x_l + 1.0; 270 aa = a->a + j*bs2; 271 for ( k=0; k<bs; k++ ) { 272 for ( l=0; l<bs; l++ ) { 273 if (PetscReal(*aa++) <= 0.) continue; 274 DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 275 } 276 } 277 } 278 } 279 280 DrawFlush(draw); 281 DrawGetPause(draw,&pause); 282 if (pause >= 0) { PetscSleep(pause); return 0;} 283 284 /* allow the matrix to zoom or shrink */ 285 ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0); 286 while (button != BUTTON_RIGHT) { 287 DrawClear(draw); 288 if (button == BUTTON_LEFT) scale = .5; 289 else if (button == BUTTON_CENTER) scale = 2.; 290 xl = scale*(xl + w - xc) + xc - w*scale; 291 xr = scale*(xr - w - xc) + xc + w*scale; 292 yl = scale*(yl + h - yc) + yc - h*scale; 293 yr = scale*(yr - h - yc) + yc + h*scale; 294 w *= scale; h *= scale; 295 ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr); 296 color = DRAW_BLUE; 297 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 298 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 299 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 300 x_l = a->j[j]*bs; x_r = x_l + 1.0; 301 aa = a->a + j*bs2; 302 for ( k=0; k<bs; k++ ) { 303 for ( l=0; l<bs; l++ ) { 304 if (PetscReal(*aa++) >= 0.) continue; 305 DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 306 } 307 } 308 } 309 } 310 color = DRAW_CYAN; 311 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 312 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 313 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 314 x_l = a->j[j]*bs; x_r = x_l + 1.0; 315 aa = a->a + j*bs2; 316 for ( k=0; k<bs; k++ ) { 317 for ( l=0; l<bs; l++ ) { 318 if (PetscReal(*aa++) != 0.) continue; 319 DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 320 } 321 } 322 } 323 } 324 325 color = DRAW_RED; 326 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 327 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 328 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 329 x_l = a->j[j]*bs; x_r = x_l + 1.0; 330 aa = a->a + j*bs2; 331 for ( k=0; k<bs; k++ ) { 332 for ( l=0; l<bs; l++ ) { 333 if (PetscReal(*aa++) <= 0.) continue; 334 DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 335 } 336 } 337 } 338 } 339 ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0); 340 } 341 return 0; 342 } 343 344 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer) 345 { 346 Mat A = (Mat) obj; 347 ViewerType vtype; 348 int ierr; 349 350 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 351 if (vtype == MATLAB_VIEWER) { 352 SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported"); 353 } 354 else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){ 355 return MatView_SeqBAIJ_ASCII(A,viewer); 356 } 357 else if (vtype == BINARY_FILE_VIEWER) { 358 return MatView_SeqBAIJ_Binary(A,viewer); 359 } 360 else if (vtype == DRAW_VIEWER) { 361 return MatView_SeqBAIJ_Draw(A,viewer); 362 } 363 return 0; 364 } 365 366 #define CHUNKSIZE 10 367 368 /* This version has row oriented v */ 369 static int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 370 { 371 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 372 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 373 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 374 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 375 int ridx,cidx,bs2=a->bs2; 376 Scalar *ap,value,*aa=a->a,*bap; 377 378 for ( k=0; k<m; k++ ) { /* loop over added rows */ 379 row = im[k]; brow = row/bs; 380 if (row < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative row"); 381 if (row >= a->m) SETERRQ(1,"MatSetValues_SeqBAIJ:Row too large"); 382 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 383 rmax = imax[brow]; nrow = ailen[brow]; 384 low = 0; 385 for ( l=0; l<n; l++ ) { /* loop over added columns */ 386 if (in[l] < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative column"); 387 if (in[l] >= a->n) SETERRQ(1,"MatSetValues_SeqBAIJ:Column too large"); 388 col = in[l]; bcol = col/bs; 389 ridx = row % bs; cidx = col % bs; 390 if (roworiented) { 391 value = *v++; 392 } 393 else { 394 value = v[k + l*m]; 395 } 396 if (!sorted) low = 0; high = nrow; 397 while (high-low > 5) { 398 t = (low+high)/2; 399 if (rp[t] > bcol) high = t; 400 else low = t; 401 } 402 for ( i=low; i<high; i++ ) { 403 if (rp[i] > bcol) break; 404 if (rp[i] == bcol) { 405 bap = ap + bs2*i + bs*cidx + ridx; 406 if (is == ADD_VALUES) *bap += value; 407 else *bap = value; 408 goto noinsert; 409 } 410 } 411 if (nonew) goto noinsert; 412 if (nrow >= rmax) { 413 /* there is no extra room in row, therefore enlarge */ 414 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 415 Scalar *new_a; 416 417 /* malloc new storage space */ 418 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 419 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 420 new_j = (int *) (new_a + bs2*new_nz); 421 new_i = new_j + new_nz; 422 423 /* copy over old data into new slots */ 424 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} 425 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 426 PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); 427 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 428 PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, 429 len*sizeof(int)); 430 PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); 431 PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 432 PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), 433 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); 434 /* free up old matrix storage */ 435 PetscFree(a->a); 436 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 437 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 438 a->singlemalloc = 1; 439 440 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 441 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 442 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 443 a->maxnz += bs2*CHUNKSIZE; 444 a->reallocs++; 445 a->nz++; 446 } 447 N = nrow++ - 1; 448 /* shift up all the later entries in this row */ 449 for ( ii=N; ii>=i; ii-- ) { 450 rp[ii+1] = rp[ii]; 451 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 452 } 453 if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 454 rp[i] = bcol; 455 ap[bs2*i + bs*cidx + ridx] = value; 456 noinsert:; 457 low = i; 458 } 459 ailen[brow] = nrow; 460 } 461 return 0; 462 } 463 464 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 465 { 466 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 467 *m = a->m; *n = a->n; 468 return 0; 469 } 470 471 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 472 { 473 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 474 *m = 0; *n = a->m; 475 return 0; 476 } 477 478 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 479 { 480 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 481 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 482 Scalar *aa,*v_i,*aa_i; 483 484 bs = a->bs; 485 ai = a->i; 486 aj = a->j; 487 aa = a->a; 488 bs2 = a->bs2; 489 490 if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range"); 491 492 bn = row/bs; /* Block number */ 493 bp = row % bs; /* Block Position */ 494 M = ai[bn+1] - ai[bn]; 495 *nz = bs*M; 496 497 if (v) { 498 *v = 0; 499 if (*nz) { 500 *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v); 501 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 502 v_i = *v + i*bs; 503 aa_i = aa + bs2*(ai[bn] + i); 504 for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];} 505 } 506 } 507 } 508 509 if (idx) { 510 *idx = 0; 511 if (*nz) { 512 *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx); 513 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 514 idx_i = *idx + i*bs; 515 itmp = bs*aj[ai[bn] + i]; 516 for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;} 517 } 518 } 519 } 520 return 0; 521 } 522 523 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 524 { 525 if (idx) {if (*idx) PetscFree(*idx);} 526 if (v) {if (*v) PetscFree(*v);} 527 return 0; 528 } 529 530 static int MatTranspose_SeqBAIJ(Mat A,Mat *B) 531 { 532 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 533 Mat C; 534 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 535 int *rows,*cols,bs2=a->bs2; 536 Scalar *array=a->a; 537 538 if (B==PETSC_NULL && mbs!=nbs) 539 SETERRQ(1,"MatTranspose_SeqBAIJ:Square matrix only for in-place"); 540 col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col); 541 PetscMemzero(col,(1+nbs)*sizeof(int)); 542 543 for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1; 544 ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr); 545 PetscFree(col); 546 rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows); 547 cols = rows + bs; 548 for ( i=0; i<mbs; i++ ) { 549 cols[0] = i*bs; 550 for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1; 551 len = ai[i+1] - ai[i]; 552 for ( j=0; j<len; j++ ) { 553 rows[0] = (*aj++)*bs; 554 for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1; 555 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr); 556 array += bs2; 557 } 558 } 559 PetscFree(rows); 560 561 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 562 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 563 564 if (B != PETSC_NULL) { 565 *B = C; 566 } else { 567 /* This isn't really an in-place transpose */ 568 PetscFree(a->a); 569 if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);} 570 if (a->diag) PetscFree(a->diag); 571 if (a->ilen) PetscFree(a->ilen); 572 if (a->imax) PetscFree(a->imax); 573 if (a->solve_work) PetscFree(a->solve_work); 574 PetscFree(a); 575 PetscMemcpy(A,C,sizeof(struct _Mat)); 576 PetscHeaderDestroy(C); 577 } 578 return 0; 579 } 580 581 582 static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 583 { 584 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 585 int fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax; 586 int m = a->m,*ip, N, *ailen = a->ilen; 587 int mbs = a->mbs, bs2 = a->bs2; 588 Scalar *aa = a->a, *ap; 589 590 if (mode == MAT_FLUSH_ASSEMBLY) return 0; 591 592 for ( i=1; i<mbs; i++ ) { 593 /* move each row back by the amount of empty slots (fshift) before it*/ 594 fshift += imax[i-1] - ailen[i-1]; 595 if (fshift) { 596 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 597 N = ailen[i]; 598 for ( j=0; j<N; j++ ) { 599 ip[j-fshift] = ip[j]; 600 PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar)); 601 } 602 } 603 ai[i] = ai[i-1] + ailen[i-1]; 604 } 605 if (mbs) { 606 fshift += imax[mbs-1] - ailen[mbs-1]; 607 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 608 } 609 /* reset ilen and imax for each row */ 610 for ( i=0; i<mbs; i++ ) { 611 ailen[i] = imax[i] = ai[i+1] - ai[i]; 612 } 613 a->nz = ai[mbs]; 614 615 /* diagonals may have moved, so kill the diagonal pointers */ 616 if (fshift && a->diag) { 617 PetscFree(a->diag); 618 PLogObjectMemory(A,-(m+1)*sizeof(int)); 619 a->diag = 0; 620 } 621 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ: Unneed storage space (blocks) %d used %d, rows %d, block size %d\n", fshift*bs2,a->nz*bs2,m,a->bs); 622 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ: Number of mallocs during MatSetValues %d\n", 623 a->reallocs); 624 return 0; 625 } 626 627 static int MatZeroEntries_SeqBAIJ(Mat A) 628 { 629 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 630 PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar)); 631 return 0; 632 } 633 634 int MatDestroy_SeqBAIJ(PetscObject obj) 635 { 636 Mat A = (Mat) obj; 637 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 638 639 #if defined(PETSC_LOG) 640 PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 641 #endif 642 PetscFree(a->a); 643 if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);} 644 if (a->diag) PetscFree(a->diag); 645 if (a->ilen) PetscFree(a->ilen); 646 if (a->imax) PetscFree(a->imax); 647 if (a->solve_work) PetscFree(a->solve_work); 648 if (a->mult_work) PetscFree(a->mult_work); 649 PetscFree(a); 650 PLogObjectDestroy(A); 651 PetscHeaderDestroy(A); 652 return 0; 653 } 654 655 static int MatSetOption_SeqBAIJ(Mat A,MatOption op) 656 { 657 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 658 if (op == MAT_ROW_ORIENTED) a->roworiented = 1; 659 else if (op == MAT_COLUMN_ORIENTED) a->roworiented = 0; 660 else if (op == MAT_COLUMNS_SORTED) a->sorted = 1; 661 else if (op == MAT_NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 662 else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 663 else if (op == MAT_ROWS_SORTED || 664 op == MAT_SYMMETRIC || 665 op == MAT_STRUCTURALLY_SYMMETRIC || 666 op == MAT_YES_NEW_DIAGONALS) 667 PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n"); 668 else if (op == MAT_NO_NEW_DIAGONALS) 669 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:MAT_NO_NEW_DIAGONALS");} 670 else 671 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");} 672 return 0; 673 } 674 675 676 /* -------------------------------------------------------*/ 677 /* Should check that shapes of vectors and matrices match */ 678 /* -------------------------------------------------------*/ 679 #include "pinclude/plapack.h" 680 681 static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz) 682 { 683 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 684 register Scalar *x,*z,*v,sum; 685 int mbs=a->mbs,i,*idx,*ii,n; 686 687 VecGetArray_Fast(xx,x); 688 VecGetArray_Fast(zz,z); 689 690 idx = a->j; 691 v = a->a; 692 ii = a->i; 693 694 for ( i=0; i<mbs; i++ ) { 695 n = ii[1] - ii[0]; ii++; 696 sum = 0.0; 697 while (n--) sum += *v++ * x[*idx++]; 698 z[i] = sum; 699 } 700 VecRestoreArray_Fast(xx,x); 701 VecRestoreArray_Fast(zz,z); 702 PLogFlops(2*a->nz - a->m); 703 return 0; 704 } 705 706 static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz) 707 { 708 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 709 register Scalar *x,*z,*v,*xb,sum1,sum2; 710 register Scalar x1,x2; 711 int mbs=a->mbs,i,*idx,*ii; 712 int j,n; 713 714 VecGetArray_Fast(xx,x); 715 VecGetArray_Fast(zz,z); 716 717 idx = a->j; 718 v = a->a; 719 ii = a->i; 720 721 for ( i=0; i<mbs; i++ ) { 722 n = ii[1] - ii[0]; ii++; 723 sum1 = 0.0; sum2 = 0.0; 724 for ( j=0; j<n; j++ ) { 725 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 726 sum1 += v[0]*x1 + v[2]*x2; 727 sum2 += v[1]*x1 + v[3]*x2; 728 v += 4; 729 } 730 z[0] = sum1; z[1] = sum2; 731 z += 2; 732 } 733 VecRestoreArray_Fast(xx,x); 734 VecRestoreArray_Fast(zz,z); 735 PLogFlops(4*a->nz - a->m); 736 return 0; 737 } 738 739 static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz) 740 { 741 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 742 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 743 int mbs=a->mbs,i,*idx,*ii,j,n; 744 745 VecGetArray_Fast(xx,x); 746 VecGetArray_Fast(zz,z); 747 748 idx = a->j; 749 v = a->a; 750 ii = a->i; 751 752 for ( i=0; i<mbs; i++ ) { 753 n = ii[1] - ii[0]; ii++; 754 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 755 for ( j=0; j<n; j++ ) { 756 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 757 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 758 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 759 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 760 v += 9; 761 } 762 z[0] = sum1; z[1] = sum2; z[2] = sum3; 763 z += 3; 764 } 765 VecRestoreArray_Fast(xx,x); 766 VecRestoreArray_Fast(zz,z); 767 PLogFlops(18*a->nz - a->m); 768 return 0; 769 } 770 771 static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz) 772 { 773 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 774 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4; 775 register Scalar x1,x2,x3,x4; 776 int mbs=a->mbs,i,*idx,*ii; 777 int j,n; 778 779 VecGetArray_Fast(xx,x); 780 VecGetArray_Fast(zz,z); 781 782 idx = a->j; 783 v = a->a; 784 ii = a->i; 785 786 for ( i=0; i<mbs; i++ ) { 787 n = ii[1] - ii[0]; ii++; 788 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 789 for ( j=0; j<n; j++ ) { 790 xb = x + 4*(*idx++); 791 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 792 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 793 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 794 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 795 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 796 v += 16; 797 } 798 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 799 z += 4; 800 } 801 VecRestoreArray_Fast(xx,x); 802 VecRestoreArray_Fast(zz,z); 803 PLogFlops(32*a->nz - a->m); 804 return 0; 805 } 806 807 static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz) 808 { 809 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 810 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5; 811 register Scalar x1,x2,x3,x4,x5; 812 int mbs=a->mbs,i,*idx,*ii,j,n; 813 814 VecGetArray_Fast(xx,x); 815 VecGetArray_Fast(zz,z); 816 817 idx = a->j; 818 v = a->a; 819 ii = a->i; 820 821 for ( i=0; i<mbs; i++ ) { 822 n = ii[1] - ii[0]; ii++; 823 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 824 for ( j=0; j<n; j++ ) { 825 xb = x + 5*(*idx++); 826 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 827 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 828 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 829 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 830 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 831 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 832 v += 25; 833 } 834 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 835 z += 5; 836 } 837 VecRestoreArray_Fast(xx,x); 838 VecRestoreArray_Fast(zz,z); 839 PLogFlops(50*a->nz - a->m); 840 return 0; 841 } 842 843 static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz) 844 { 845 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 846 register Scalar *x,*z,*v,*xb; 847 int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2; 848 int _One = 1,ncols,k; 849 Scalar _DOne = 1.0,*work,*workt,_DZero = 0.0; 850 851 VecGetArray_Fast(xx,x); 852 VecGetArray_Fast(zz,z); 853 854 idx = a->j; 855 v = a->a; 856 ii = a->i; 857 858 859 if (!a->mult_work) { 860 k = PetscMax(a->m,a->n); 861 a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work); 862 } 863 work = a->mult_work; 864 for ( i=0; i<mbs; i++ ) { 865 n = ii[1] - ii[0]; ii++; 866 ncols = n*bs; 867 workt = work; 868 for ( j=0; j<n; j++ ) { 869 xb = x + bs*(*idx++); 870 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 871 workt += bs; 872 } 873 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One); 874 v += n*bs2; 875 z += bs; 876 } 877 VecRestoreArray_Fast(xx,x); 878 VecRestoreArray_Fast(zz,z); 879 PLogFlops(2*a->nz*bs2 - a->m); 880 return 0; 881 } 882 883 static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 884 { 885 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 886 register Scalar *x,*y,*z,*v,sum; 887 int mbs=a->mbs,i,*idx,*ii,n; 888 889 VecGetArray_Fast(xx,x); 890 VecGetArray_Fast(yy,y); 891 VecGetArray_Fast(zz,z); 892 893 idx = a->j; 894 v = a->a; 895 ii = a->i; 896 897 for ( i=0; i<mbs; i++ ) { 898 n = ii[1] - ii[0]; ii++; 899 sum = y[i]; 900 while (n--) sum += *v++ * x[*idx++]; 901 z[i] = sum; 902 } 903 VecRestoreArray_Fast(xx,x); 904 VecRestoreArray_Fast(yy,y); 905 VecRestoreArray_Fast(zz,z); 906 PLogFlops(2*a->nz); 907 return 0; 908 } 909 910 static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 911 { 912 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 913 register Scalar *x,*y,*z,*v,*xb,sum1,sum2; 914 register Scalar x1,x2; 915 int mbs=a->mbs,i,*idx,*ii; 916 int j,n; 917 918 VecGetArray_Fast(xx,x); 919 VecGetArray_Fast(yy,y); 920 VecGetArray_Fast(zz,z); 921 922 idx = a->j; 923 v = a->a; 924 ii = a->i; 925 926 for ( i=0; i<mbs; i++ ) { 927 n = ii[1] - ii[0]; ii++; 928 sum1 = y[0]; sum2 = y[1]; 929 for ( j=0; j<n; j++ ) { 930 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 931 sum1 += v[0]*x1 + v[2]*x2; 932 sum2 += v[1]*x1 + v[3]*x2; 933 v += 4; 934 } 935 z[0] = sum1; z[1] = sum2; 936 z += 2; y += 2; 937 } 938 VecRestoreArray_Fast(xx,x); 939 VecRestoreArray_Fast(yy,y); 940 VecRestoreArray_Fast(zz,z); 941 PLogFlops(4*a->nz); 942 return 0; 943 } 944 945 static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 946 { 947 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 948 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 949 int mbs=a->mbs,i,*idx,*ii,j,n; 950 951 VecGetArray_Fast(xx,x); 952 VecGetArray_Fast(yy,y); 953 VecGetArray_Fast(zz,z); 954 955 idx = a->j; 956 v = a->a; 957 ii = a->i; 958 959 for ( i=0; i<mbs; i++ ) { 960 n = ii[1] - ii[0]; ii++; 961 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; 962 for ( j=0; j<n; j++ ) { 963 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 964 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 965 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 966 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 967 v += 9; 968 } 969 z[0] = sum1; z[1] = sum2; z[2] = sum3; 970 z += 3; y += 3; 971 } 972 VecRestoreArray_Fast(xx,x); 973 VecRestoreArray_Fast(yy,y); 974 VecRestoreArray_Fast(zz,z); 975 PLogFlops(18*a->nz); 976 return 0; 977 } 978 979 static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 980 { 981 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 982 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4; 983 register Scalar x1,x2,x3,x4; 984 int mbs=a->mbs,i,*idx,*ii; 985 int j,n; 986 987 VecGetArray_Fast(xx,x); 988 VecGetArray_Fast(yy,y); 989 VecGetArray_Fast(zz,z); 990 991 idx = a->j; 992 v = a->a; 993 ii = a->i; 994 995 for ( i=0; i<mbs; i++ ) { 996 n = ii[1] - ii[0]; ii++; 997 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; 998 for ( j=0; j<n; j++ ) { 999 xb = x + 4*(*idx++); 1000 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1001 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 1002 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 1003 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 1004 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 1005 v += 16; 1006 } 1007 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 1008 z += 4; y += 4; 1009 } 1010 VecRestoreArray_Fast(xx,x); 1011 VecRestoreArray_Fast(yy,y); 1012 VecRestoreArray_Fast(zz,z); 1013 PLogFlops(32*a->nz); 1014 return 0; 1015 } 1016 1017 static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 1018 { 1019 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1020 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5; 1021 register Scalar x1,x2,x3,x4,x5; 1022 int mbs=a->mbs,i,*idx,*ii,j,n; 1023 1024 VecGetArray_Fast(xx,x); 1025 VecGetArray_Fast(yy,y); 1026 VecGetArray_Fast(zz,z); 1027 1028 idx = a->j; 1029 v = a->a; 1030 ii = a->i; 1031 1032 for ( i=0; i<mbs; i++ ) { 1033 n = ii[1] - ii[0]; ii++; 1034 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; 1035 for ( j=0; j<n; j++ ) { 1036 xb = x + 5*(*idx++); 1037 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 1038 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 1039 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 1040 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 1041 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 1042 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 1043 v += 25; 1044 } 1045 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 1046 z += 5; y += 5; 1047 } 1048 VecRestoreArray_Fast(xx,x); 1049 VecRestoreArray_Fast(yy,y); 1050 VecRestoreArray_Fast(zz,z); 1051 PLogFlops(50*a->nz); 1052 return 0; 1053 } 1054 1055 static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 1056 { 1057 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1058 register Scalar *x,*z,*v,*xb; 1059 int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr; 1060 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1061 1062 if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 1063 1064 VecGetArray_Fast(xx,x); 1065 VecGetArray_Fast(zz,z); 1066 1067 idx = a->j; 1068 v = a->a; 1069 ii = a->i; 1070 1071 1072 if (!a->mult_work) { 1073 k = PetscMax(a->m,a->n); 1074 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work); 1075 } 1076 work = a->mult_work; 1077 for ( i=0; i<mbs; i++ ) { 1078 n = ii[1] - ii[0]; ii++; 1079 ncols = n*bs; 1080 workt = work; 1081 for ( j=0; j<n; j++ ) { 1082 xb = x + bs*(*idx++); 1083 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 1084 workt += bs; 1085 } 1086 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); 1087 v += n*bs2; 1088 z += bs; 1089 } 1090 VecRestoreArray_Fast(xx,x); 1091 VecRestoreArray_Fast(zz,z); 1092 PLogFlops(2*a->nz*bs2 ); 1093 return 0; 1094 } 1095 1096 static int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz) 1097 { 1098 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1099 Scalar *xg,*zg,*zb; 1100 register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 1101 int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 1102 int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1103 1104 1105 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 1106 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 1107 PetscMemzero(z,N*sizeof(Scalar)); 1108 1109 idx = a->j; 1110 v = a->a; 1111 ii = a->i; 1112 1113 switch (bs) { 1114 case 1: 1115 for ( i=0; i<mbs; i++ ) { 1116 n = ii[1] - ii[0]; ii++; 1117 xb = x + i; x1 = xb[0]; 1118 ib = idx + ai[i]; 1119 for ( j=0; j<n; j++ ) { 1120 rval = ib[j]; 1121 z[rval] += *v++ * x1; 1122 } 1123 } 1124 break; 1125 case 2: 1126 for ( i=0; i<mbs; i++ ) { 1127 n = ii[1] - ii[0]; ii++; 1128 xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1129 ib = idx + ai[i]; 1130 for ( j=0; j<n; j++ ) { 1131 rval = ib[j]*2; 1132 z[rval++] += v[0]*x1 + v[1]*x2; 1133 z[rval++] += v[2]*x1 + v[3]*x2; 1134 v += 4; 1135 } 1136 } 1137 break; 1138 case 3: 1139 for ( i=0; i<mbs; i++ ) { 1140 n = ii[1] - ii[0]; ii++; 1141 xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1142 ib = idx + ai[i]; 1143 for ( j=0; j<n; j++ ) { 1144 rval = ib[j]*3; 1145 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1146 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1147 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1148 v += 9; 1149 } 1150 } 1151 break; 1152 case 4: 1153 for ( i=0; i<mbs; i++ ) { 1154 n = ii[1] - ii[0]; ii++; 1155 xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1156 ib = idx + ai[i]; 1157 for ( j=0; j<n; j++ ) { 1158 rval = ib[j]*4; 1159 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1160 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1161 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1162 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1163 v += 16; 1164 } 1165 } 1166 break; 1167 case 5: 1168 for ( i=0; i<mbs; i++ ) { 1169 n = ii[1] - ii[0]; ii++; 1170 xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1171 x4 = xb[3]; x5 = xb[4]; 1172 ib = idx + ai[i]; 1173 for ( j=0; j<n; j++ ) { 1174 rval = ib[j]*5; 1175 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1176 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1177 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1178 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1179 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1180 v += 25; 1181 } 1182 } 1183 break; 1184 /* block sizes larger then 5 by 5 are handled by BLAS */ 1185 default: { 1186 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1187 if (!a->mult_work) { 1188 k = PetscMax(a->m,a->n); 1189 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1190 CHKPTRQ(a->mult_work); 1191 } 1192 work = a->mult_work; 1193 for ( i=0; i<mbs; i++ ) { 1194 n = ii[1] - ii[0]; ii++; 1195 ncols = n*bs; 1196 PetscMemzero(work,ncols*sizeof(Scalar)); 1197 LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1198 v += n*bs2; 1199 x += bs; 1200 workt = work; 1201 for ( j=0; j<n; j++ ) { 1202 zb = z + bs*(*idx++); 1203 for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1204 workt += bs; 1205 } 1206 } 1207 } 1208 } 1209 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 1210 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1211 PLogFlops(2*a->nz*a->bs2 - a->n); 1212 return 0; 1213 } 1214 static int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1215 { 1216 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1217 Scalar *xg,*zg,*zb; 1218 register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 1219 int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 1220 int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1221 1222 1223 1224 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 1225 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 1226 1227 if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 1228 else PetscMemzero(z,N*sizeof(Scalar)); 1229 1230 1231 idx = a->j; 1232 v = a->a; 1233 ii = a->i; 1234 1235 switch (bs) { 1236 case 1: 1237 for ( i=0; i<mbs; i++ ) { 1238 n = ii[1] - ii[0]; ii++; 1239 xb = x + i; x1 = xb[0]; 1240 ib = idx + ai[i]; 1241 for ( j=0; j<n; j++ ) { 1242 rval = ib[j]; 1243 z[rval] += *v++ * x1; 1244 } 1245 } 1246 break; 1247 case 2: 1248 for ( i=0; i<mbs; i++ ) { 1249 n = ii[1] - ii[0]; ii++; 1250 xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1251 ib = idx + ai[i]; 1252 for ( j=0; j<n; j++ ) { 1253 rval = ib[j]*2; 1254 z[rval++] += v[0]*x1 + v[1]*x2; 1255 z[rval++] += v[2]*x1 + v[3]*x2; 1256 v += 4; 1257 } 1258 } 1259 break; 1260 case 3: 1261 for ( i=0; i<mbs; i++ ) { 1262 n = ii[1] - ii[0]; ii++; 1263 xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1264 ib = idx + ai[i]; 1265 for ( j=0; j<n; j++ ) { 1266 rval = ib[j]*3; 1267 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1268 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1269 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1270 v += 9; 1271 } 1272 } 1273 break; 1274 case 4: 1275 for ( i=0; i<mbs; i++ ) { 1276 n = ii[1] - ii[0]; ii++; 1277 xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1278 ib = idx + ai[i]; 1279 for ( j=0; j<n; j++ ) { 1280 rval = ib[j]*4; 1281 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1282 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1283 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1284 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1285 v += 16; 1286 } 1287 } 1288 break; 1289 case 5: 1290 for ( i=0; i<mbs; i++ ) { 1291 n = ii[1] - ii[0]; ii++; 1292 xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1293 x4 = xb[3]; x5 = xb[4]; 1294 ib = idx + ai[i]; 1295 for ( j=0; j<n; j++ ) { 1296 rval = ib[j]*5; 1297 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1298 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1299 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1300 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1301 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1302 v += 25; 1303 } 1304 } 1305 break; 1306 /* block sizes larger then 5 by 5 are handled by BLAS */ 1307 default: { 1308 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1309 if (!a->mult_work) { 1310 k = PetscMax(a->m,a->n); 1311 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1312 CHKPTRQ(a->mult_work); 1313 } 1314 work = a->mult_work; 1315 for ( i=0; i<mbs; i++ ) { 1316 n = ii[1] - ii[0]; ii++; 1317 ncols = n*bs; 1318 PetscMemzero(work,ncols*sizeof(Scalar)); 1319 LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1320 v += n*bs2; 1321 x += bs; 1322 workt = work; 1323 for ( j=0; j<n; j++ ) { 1324 zb = z + bs*(*idx++); 1325 for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1326 workt += bs; 1327 } 1328 } 1329 } 1330 } 1331 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 1332 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1333 PLogFlops(2*a->nz*a->bs2); 1334 return 0; 1335 } 1336 1337 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem) 1338 { 1339 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1340 if (nz) *nz = a->bs2*a->nz; 1341 if (nza) *nza = a->maxnz; 1342 if (mem) *mem = (int)A->mem; 1343 return 0; 1344 } 1345 1346 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg) 1347 { 1348 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 1349 1350 if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type"); 1351 1352 /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 1353 if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)|| 1354 (a->nz != b->nz)) { 1355 *flg = PETSC_FALSE; return 0; 1356 } 1357 1358 /* if the a->i are the same */ 1359 if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) { 1360 *flg = PETSC_FALSE; return 0; 1361 } 1362 1363 /* if a->j are the same */ 1364 if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) { 1365 *flg = PETSC_FALSE; return 0; 1366 } 1367 1368 /* if a->a are the same */ 1369 if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) { 1370 *flg = PETSC_FALSE; return 0; 1371 } 1372 *flg = PETSC_TRUE; 1373 return 0; 1374 1375 } 1376 1377 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v) 1378 { 1379 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1380 int i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 1381 Scalar *x, zero = 0.0,*aa,*aa_j; 1382 1383 bs = a->bs; 1384 aa = a->a; 1385 ai = a->i; 1386 aj = a->j; 1387 ambs = a->mbs; 1388 bs2 = a->bs2; 1389 1390 VecSet(&zero,v); 1391 VecGetArray(v,&x); VecGetLocalSize(v,&n); 1392 if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector"); 1393 for ( i=0; i<ambs; i++ ) { 1394 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1395 if (aj[j] == i) { 1396 row = i*bs; 1397 aa_j = aa+j*bs2; 1398 for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 1399 break; 1400 } 1401 } 1402 } 1403 return 0; 1404 } 1405 1406 static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr) 1407 { 1408 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1409 Scalar *l,*r,x,*v,*aa,*li,*ri; 1410 int i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2; 1411 1412 ai = a->i; 1413 aj = a->j; 1414 aa = a->a; 1415 m = a->m; 1416 n = a->n; 1417 bs = a->bs; 1418 mbs = a->mbs; 1419 bs2 = a->bs2; 1420 if (ll) { 1421 VecGetArray(ll,&l); VecGetSize(ll,&lm); 1422 if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length"); 1423 for ( i=0; i<mbs; i++ ) { /* for each block row */ 1424 M = ai[i+1] - ai[i]; 1425 li = l + i*bs; 1426 v = aa + bs2*ai[i]; 1427 for ( j=0; j<M; j++ ) { /* for each block */ 1428 for ( k=0; k<bs2; k++ ) { 1429 (*v++) *= li[k%bs]; 1430 } 1431 } 1432 } 1433 } 1434 1435 if (rr) { 1436 VecGetArray(rr,&r); VecGetSize(rr,&rn); 1437 if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length"); 1438 for ( i=0; i<mbs; i++ ) { /* for each block row */ 1439 M = ai[i+1] - ai[i]; 1440 v = aa + bs2*ai[i]; 1441 for ( j=0; j<M; j++ ) { /* for each block */ 1442 ri = r + bs*aj[ai[i]+j]; 1443 for ( k=0; k<bs; k++ ) { 1444 x = ri[k]; 1445 for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x; 1446 } 1447 } 1448 } 1449 } 1450 return 0; 1451 } 1452 1453 1454 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 1455 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 1456 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int); 1457 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*); 1458 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**); 1459 1460 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec); 1461 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec); 1462 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec); 1463 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec); 1464 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec); 1465 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec); 1466 1467 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 1468 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 1469 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 1470 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 1471 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 1472 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 1473 1474 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm) 1475 { 1476 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1477 Scalar *v = a->a; 1478 double sum = 0.0; 1479 int i,nz=a->nz,bs2=a->bs2; 1480 1481 if (type == NORM_FROBENIUS) { 1482 for (i=0; i< bs2*nz; i++ ) { 1483 #if defined(PETSC_COMPLEX) 1484 sum += real(conj(*v)*(*v)); v++; 1485 #else 1486 sum += (*v)*(*v); v++; 1487 #endif 1488 } 1489 *norm = sqrt(sum); 1490 } 1491 else { 1492 SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet"); 1493 } 1494 return 0; 1495 } 1496 1497 /* 1498 note: This can only work for identity for row and col. It would 1499 be good to check this and otherwise generate an error. 1500 */ 1501 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill) 1502 { 1503 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1504 Mat outA; 1505 int ierr; 1506 1507 if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported"); 1508 1509 outA = inA; 1510 inA->factor = FACTOR_LU; 1511 a->row = row; 1512 a->col = col; 1513 1514 a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 1515 1516 if (!a->diag) { 1517 ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr); 1518 } 1519 ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr); 1520 return 0; 1521 } 1522 1523 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA) 1524 { 1525 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1526 int one = 1, totalnz = a->bs2*a->nz; 1527 BLscal_( &totalnz, alpha, a->a, &one ); 1528 PLogFlops(totalnz); 1529 return 0; 1530 } 1531 1532 static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 1533 { 1534 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1535 int *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 1536 int *ai = a->i, *ailen = a->ilen; 1537 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 1538 Scalar *ap, *aa = a->a, zero = 0.0; 1539 1540 for ( k=0; k<m; k++ ) { /* loop over rows */ 1541 row = im[k]; brow = row/bs; 1542 if (row < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative row"); 1543 if (row >= a->m) SETERRQ(1,"MatGetValues_SeqBAIJ:Row too large"); 1544 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1545 nrow = ailen[brow]; 1546 for ( l=0; l<n; l++ ) { /* loop over columns */ 1547 if (in[l] < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative column"); 1548 if (in[l] >= a->n) SETERRQ(1,"MatGetValues_SeqBAIJ:Column too large"); 1549 col = in[l] ; 1550 bcol = col/bs; 1551 cidx = col%bs; 1552 ridx = row%bs; 1553 high = nrow; 1554 low = 0; /* assume unsorted */ 1555 while (high-low > 5) { 1556 t = (low+high)/2; 1557 if (rp[t] > bcol) high = t; 1558 else low = t; 1559 } 1560 for ( i=low; i<high; i++ ) { 1561 if (rp[i] > bcol) break; 1562 if (rp[i] == bcol) { 1563 *v++ = ap[bs2*i+bs*cidx+ridx]; 1564 goto finished; 1565 } 1566 } 1567 *v++ = zero; 1568 finished:; 1569 } 1570 } 1571 return 0; 1572 } 1573 1574 static int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs) 1575 { 1576 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data; 1577 *bs = baij->bs; 1578 return 0; 1579 } 1580 1581 /* idx should be of length atlease bs */ 1582 static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg) 1583 { 1584 int i,row; 1585 row = idx[0]; 1586 if (row%bs!=0) { *flg = PETSC_FALSE; return 0; } 1587 1588 for ( i=1; i<bs; i++ ) { 1589 if (row+i != idx[i]) { *flg = PETSC_FALSE; return 0; } 1590 } 1591 *flg = PETSC_TRUE; 1592 return 0; 1593 } 1594 1595 static int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag) 1596 { 1597 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1598 IS is_local; 1599 int ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2; 1600 PetscTruth flg; 1601 Scalar zero = 0.0,*aa; 1602 1603 /* Make a copy of the IS and sort it */ 1604 ierr = ISGetSize(is,&is_n);CHKERRQ(ierr); 1605 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1606 ierr = ISCreateSeq(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr); 1607 ierr = ISSort(is_local); CHKERRQ(ierr); 1608 ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr); 1609 1610 i = 0; 1611 while (i < is_n) { 1612 if (rows[i]<0 || rows[i]>m) SETERRQ(1,"MatZeroRows_SeqBAIJ:row out of range"); 1613 flg = PETSC_FALSE; 1614 if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); } 1615 if (flg) { /* There exists a block of rows to be Zerowed */ 1616 baij->ilen[rows[i]/bs] = 0; 1617 i += bs; 1618 } else { /* Zero out only the requested row */ 1619 count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs; 1620 aa = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs); 1621 for ( j=0; j<count; j++ ) { 1622 aa[0] = zero; 1623 aa+=bs; 1624 } 1625 i++; 1626 } 1627 } 1628 if (diag) { 1629 for ( j=0; j<is_n; j++ ) { 1630 ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1631 } 1632 } 1633 ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr); 1634 ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr); 1635 ierr = ISDestroy(is_local); CHKERRQ(ierr); 1636 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1637 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1638 1639 return 0; 1640 } 1641 int MatPrintHelp_SeqBAIJ(Mat A) 1642 { 1643 static int called = 0; 1644 MPI_Comm comm = A->comm; 1645 1646 if (called) return 0; else called = 1; 1647 PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n"); 1648 PetscPrintf(comm," -mat_block_size <block_size>\n"); 1649 return 0; 1650 } 1651 1652 /* -------------------------------------------------------------------*/ 1653 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ, 1654 MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ, 1655 MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N, 1656 MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ, 1657 MatSolve_SeqBAIJ_N,0, 1658 0,0, 1659 MatLUFactor_SeqBAIJ,0, 1660 0, 1661 MatTranspose_SeqBAIJ, 1662 MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ, 1663 MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ, 1664 0,MatAssemblyEnd_SeqBAIJ, 1665 0, 1666 MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ, 1667 MatGetReordering_SeqBAIJ, 1668 MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0, 1669 MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ, 1670 MatILUFactorSymbolic_SeqBAIJ,0, 1671 0,0,/* MatConvert_SeqBAIJ */ 0, 1672 MatGetSubMatrix_SeqBAIJ,0, 1673 MatConvertSameType_SeqBAIJ,0,0, 1674 MatILUFactor_SeqBAIJ,0,0, 1675 MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ, 1676 MatGetValues_SeqBAIJ,0, 1677 MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ, 1678 0,0,0,MatGetBlockSize_SeqBAIJ}; 1679 1680 /*@C 1681 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1682 compressed row) format. For good matrix assembly performance the 1683 user should preallocate the matrix storage by setting the parameter nz 1684 (or the array nzz). By setting these parameters accurately, performance 1685 during matrix assembly can be increased by more than a factor of 50. 1686 1687 Input Parameters: 1688 . comm - MPI communicator, set to MPI_COMM_SELF 1689 . bs - size of block 1690 . m - number of rows 1691 . n - number of columns 1692 . nz - number of block nonzeros per block row (same for all rows) 1693 . nzz - array containing the number of block nonzeros in the various block rows 1694 (possibly different for each block row) or PETSC_NULL 1695 1696 Output Parameter: 1697 . A - the matrix 1698 1699 Notes: 1700 The block AIJ format is fully compatible with standard Fortran 77 1701 storage. That is, the stored row and column indices can begin at 1702 either one (as in Fortran) or zero. See the users' manual for details. 1703 1704 Specify the preallocated storage with either nz or nnz (not both). 1705 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1706 allocation. For additional details, see the users manual chapter on 1707 matrices and the file $(PETSC_DIR)/Performance. 1708 1709 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1710 @*/ 1711 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 1712 { 1713 Mat B; 1714 Mat_SeqBAIJ *b; 1715 int i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs; 1716 1717 if (mbs*bs!=m || nbs*bs!=n) 1718 SETERRQ(1,"MatCreateSeqBAIJ:Number rows, cols must be divisible by blocksize"); 1719 1720 *A = 0; 1721 PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm); 1722 PLogObjectCreate(B); 1723 B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b); 1724 PetscMemzero(b,sizeof(Mat_SeqBAIJ)); 1725 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 1726 ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr); 1727 if (!flg) { 1728 switch (bs) { 1729 case 1: 1730 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1731 B->ops.solve = MatSolve_SeqBAIJ_1; 1732 B->ops.mult = MatMult_SeqBAIJ_1; 1733 B->ops.multadd = MatMultAdd_SeqBAIJ_1; 1734 break; 1735 case 2: 1736 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 1737 B->ops.solve = MatSolve_SeqBAIJ_2; 1738 B->ops.mult = MatMult_SeqBAIJ_2; 1739 B->ops.multadd = MatMultAdd_SeqBAIJ_2; 1740 break; 1741 case 3: 1742 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 1743 B->ops.solve = MatSolve_SeqBAIJ_3; 1744 B->ops.mult = MatMult_SeqBAIJ_3; 1745 B->ops.multadd = MatMultAdd_SeqBAIJ_3; 1746 break; 1747 case 4: 1748 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 1749 B->ops.solve = MatSolve_SeqBAIJ_4; 1750 B->ops.mult = MatMult_SeqBAIJ_4; 1751 B->ops.multadd = MatMultAdd_SeqBAIJ_4; 1752 break; 1753 case 5: 1754 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 1755 B->ops.solve = MatSolve_SeqBAIJ_5; 1756 B->ops.mult = MatMult_SeqBAIJ_5; 1757 B->ops.multadd = MatMultAdd_SeqBAIJ_5; 1758 break; 1759 } 1760 } 1761 B->destroy = MatDestroy_SeqBAIJ; 1762 B->view = MatView_SeqBAIJ; 1763 B->factor = 0; 1764 B->lupivotthreshold = 1.0; 1765 b->row = 0; 1766 b->col = 0; 1767 b->reallocs = 0; 1768 1769 b->m = m; B->m = m; B->M = m; 1770 b->n = n; B->n = n; B->N = n; 1771 b->mbs = mbs; 1772 b->nbs = nbs; 1773 b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax); 1774 if (nnz == PETSC_NULL) { 1775 if (nz == PETSC_DEFAULT) nz = 5; 1776 else if (nz <= 0) nz = 1; 1777 for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 1778 nz = nz*mbs; 1779 } 1780 else { 1781 nz = 0; 1782 for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 1783 } 1784 1785 /* allocate the matrix space */ 1786 len = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int); 1787 b->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a); 1788 PetscMemzero(b->a,nz*bs2*sizeof(Scalar)); 1789 b->j = (int *) (b->a + nz*bs2); 1790 PetscMemzero(b->j,nz*sizeof(int)); 1791 b->i = b->j + nz; 1792 b->singlemalloc = 1; 1793 1794 b->i[0] = 0; 1795 for (i=1; i<mbs+1; i++) { 1796 b->i[i] = b->i[i-1] + b->imax[i-1]; 1797 } 1798 1799 /* b->ilen will count nonzeros in each block row so far. */ 1800 b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 1801 PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 1802 for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 1803 1804 b->bs = bs; 1805 b->bs2 = bs2; 1806 b->mbs = mbs; 1807 b->nz = 0; 1808 b->maxnz = nz*bs2; 1809 b->sorted = 0; 1810 b->roworiented = 1; 1811 b->nonew = 0; 1812 b->diag = 0; 1813 b->solve_work = 0; 1814 b->mult_work = 0; 1815 b->spptr = 0; 1816 *A = B; 1817 ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr); 1818 if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); } 1819 return 0; 1820 } 1821 1822 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues) 1823 { 1824 Mat C; 1825 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 1826 int i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 1827 1828 if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix"); 1829 1830 *B = 0; 1831 PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm); 1832 PLogObjectCreate(C); 1833 C->data = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c); 1834 PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps)); 1835 C->destroy = MatDestroy_SeqBAIJ; 1836 C->view = MatView_SeqBAIJ; 1837 C->factor = A->factor; 1838 c->row = 0; 1839 c->col = 0; 1840 C->assembled = PETSC_TRUE; 1841 1842 c->m = C->m = a->m; 1843 c->n = C->n = a->n; 1844 C->M = a->m; 1845 C->N = a->n; 1846 1847 c->bs = a->bs; 1848 c->bs2 = a->bs2; 1849 c->mbs = a->mbs; 1850 c->nbs = a->nbs; 1851 1852 c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax); 1853 c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen); 1854 for ( i=0; i<mbs; i++ ) { 1855 c->imax[i] = a->imax[i]; 1856 c->ilen[i] = a->ilen[i]; 1857 } 1858 1859 /* allocate the matrix space */ 1860 c->singlemalloc = 1; 1861 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int)); 1862 c->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a); 1863 c->j = (int *) (c->a + nz*bs2); 1864 c->i = c->j + nz; 1865 PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int)); 1866 if (mbs > 0) { 1867 PetscMemcpy(c->j,a->j,nz*sizeof(int)); 1868 if (cpvalues == COPY_VALUES) { 1869 PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar)); 1870 } 1871 } 1872 1873 PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 1874 c->sorted = a->sorted; 1875 c->roworiented = a->roworiented; 1876 c->nonew = a->nonew; 1877 1878 if (a->diag) { 1879 c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag); 1880 PLogObjectMemory(C,(mbs+1)*sizeof(int)); 1881 for ( i=0; i<mbs; i++ ) { 1882 c->diag[i] = a->diag[i]; 1883 } 1884 } 1885 else c->diag = 0; 1886 c->nz = a->nz; 1887 c->maxnz = a->maxnz; 1888 c->solve_work = 0; 1889 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1890 c->mult_work = 0; 1891 *B = C; 1892 return 0; 1893 } 1894 1895 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 1896 { 1897 Mat_SeqBAIJ *a; 1898 Mat B; 1899 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg; 1900 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1901 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1902 int *masked, nmask,tmp,bs2,ishift; 1903 Scalar *aa; 1904 MPI_Comm comm = ((PetscObject) viewer)->comm; 1905 1906 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 1907 bs2 = bs*bs; 1908 1909 MPI_Comm_size(comm,&size); 1910 if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor"); 1911 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1912 ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr); 1913 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object"); 1914 M = header[1]; N = header[2]; nz = header[3]; 1915 1916 if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices"); 1917 1918 /* 1919 This code adds extra rows to make sure the number of rows is 1920 divisible by the blocksize 1921 */ 1922 mbs = M/bs; 1923 extra_rows = bs - M + bs*(mbs); 1924 if (extra_rows == bs) extra_rows = 0; 1925 else mbs++; 1926 if (extra_rows) { 1927 PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize"); 1928 } 1929 1930 /* read in row lengths */ 1931 rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 1932 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 1933 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 1934 1935 /* read in column indices */ 1936 jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj); 1937 ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr); 1938 for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 1939 1940 /* loop over row lengths determining block row lengths */ 1941 browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 1942 PetscMemzero(browlengths,mbs*sizeof(int)); 1943 mask = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask); 1944 PetscMemzero(mask,mbs*sizeof(int)); 1945 masked = mask + mbs; 1946 rowcount = 0; nzcount = 0; 1947 for ( i=0; i<mbs; i++ ) { 1948 nmask = 0; 1949 for ( j=0; j<bs; j++ ) { 1950 kmax = rowlengths[rowcount]; 1951 for ( k=0; k<kmax; k++ ) { 1952 tmp = jj[nzcount++]/bs; 1953 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1954 } 1955 rowcount++; 1956 } 1957 browlengths[i] += nmask; 1958 /* zero out the mask elements we set */ 1959 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 1960 } 1961 1962 /* create our matrix */ 1963 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A); 1964 CHKERRQ(ierr); 1965 B = *A; 1966 a = (Mat_SeqBAIJ *) B->data; 1967 1968 /* set matrix "i" values */ 1969 a->i[0] = 0; 1970 for ( i=1; i<= mbs; i++ ) { 1971 a->i[i] = a->i[i-1] + browlengths[i-1]; 1972 a->ilen[i-1] = browlengths[i-1]; 1973 } 1974 a->nz = 0; 1975 for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 1976 1977 /* read in nonzero values */ 1978 aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 1979 ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr); 1980 for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 1981 1982 /* set "a" and "j" values into matrix */ 1983 nzcount = 0; jcount = 0; 1984 for ( i=0; i<mbs; i++ ) { 1985 nzcountb = nzcount; 1986 nmask = 0; 1987 for ( j=0; j<bs; j++ ) { 1988 kmax = rowlengths[i*bs+j]; 1989 for ( k=0; k<kmax; k++ ) { 1990 tmp = jj[nzcount++]/bs; 1991 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1992 } 1993 rowcount++; 1994 } 1995 /* sort the masked values */ 1996 PetscSortInt(nmask,masked); 1997 1998 /* set "j" values into matrix */ 1999 maskcount = 1; 2000 for ( j=0; j<nmask; j++ ) { 2001 a->j[jcount++] = masked[j]; 2002 mask[masked[j]] = maskcount++; 2003 } 2004 /* set "a" values into matrix */ 2005 ishift = bs2*a->i[i]; 2006 for ( j=0; j<bs; j++ ) { 2007 kmax = rowlengths[i*bs+j]; 2008 for ( k=0; k<kmax; k++ ) { 2009 tmp = jj[nzcountb]/bs ; 2010 block = mask[tmp] - 1; 2011 point = jj[nzcountb] - bs*tmp; 2012 idx = ishift + bs2*block + j + bs*point; 2013 a->a[idx] = aa[nzcountb++]; 2014 } 2015 } 2016 /* zero out the mask elements we set */ 2017 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 2018 } 2019 if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix"); 2020 2021 PetscFree(rowlengths); 2022 PetscFree(browlengths); 2023 PetscFree(aa); 2024 PetscFree(jj); 2025 PetscFree(mask); 2026 2027 B->assembled = PETSC_TRUE; 2028 2029 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr); 2030 if (flg) { 2031 Viewer tviewer; 2032 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2033 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO,0);CHKERRQ(ierr); 2034 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2035 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2036 } 2037 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr); 2038 if (flg) { 2039 Viewer tviewer; 2040 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2041 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr); 2042 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2043 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2044 } 2045 ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr); 2046 if (flg) { 2047 Viewer tviewer; 2048 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2049 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2050 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2051 } 2052 ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr); 2053 if (flg) { 2054 Viewer tviewer; 2055 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2056 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_MATLAB,"M");CHKERRQ(ierr); 2057 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2058 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2059 } 2060 ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr); 2061 if (flg) { 2062 Viewer tviewer; 2063 ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr); 2064 ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr); 2065 ierr = ViewerFlush(tviewer); CHKERRQ(ierr); 2066 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2067 } 2068 return 0; 2069 } 2070 2071 2072 2073