1 #ifndef lint 2 static char vcid[] = "$Id: baij.c,v 1.64 1996/08/06 04:02:46 bsmith Exp bsmith $"; 3 #endif 4 5 /* 6 Defines the basic matrix operations for the BAIJ (compressed row) 7 matrix storage format. 8 */ 9 #include "src/mat/impls/baij/seq/baij.h" 10 #include "src/vec/vecimpl.h" 11 #include "src/inline/spops.h" 12 #include "petsc.h" 13 14 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 15 16 static int MatGetReordering_SeqBAIJ(Mat A,MatReordering type,IS *rperm,IS *cperm) 17 { 18 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 19 int ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift; 20 21 /* 22 this is tacky: In the future when we have written special factorization 23 and solve routines for the identity permutation we should use a 24 stride index set instead of the general one. 25 */ 26 if (type == ORDER_NATURAL) { 27 idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx); 28 for ( i=0; i<n; i++ ) idx[i] = i; 29 ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr); 30 ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr); 31 PetscFree(idx); 32 ISSetPermutation(*rperm); 33 ISSetPermutation(*cperm); 34 ISSetIdentity(*rperm); 35 ISSetIdentity(*cperm); 36 return 0; 37 } 38 39 MatReorderingRegisterAll(); 40 ishift = 0; 41 oshift = -MatReorderingIndexShift[(int)type]; 42 if (MatReorderingRequiresSymmetric[(int)type]) { 43 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,ishift,oshift,&ia,&ja);CHKERRQ(ierr); 44 ierr = MatGetReordering_IJ(n,ia,ja,type,rperm,cperm); CHKERRQ(ierr); 45 PetscFree(ia); PetscFree(ja); 46 } else { 47 if (ishift == oshift) { 48 ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 49 } 50 else if (ishift == -1) { 51 /* temporarily subtract 1 from i and j indices */ 52 int nz = a->i[n] - 1; 53 for ( i=0; i<nz; i++ ) a->j[i]--; 54 for ( i=0; i<n+1; i++ ) a->i[i]--; 55 ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 56 for ( i=0; i<nz; i++ ) a->j[i]++; 57 for ( i=0; i<n+1; i++ ) a->i[i]++; 58 } else { 59 /* temporarily add 1 to i and j indices */ 60 int nz = a->i[n] - 1; 61 for ( i=0; i<nz; i++ ) a->j[i]++; 62 for ( i=0; i<n+1; i++ ) a->i[i]++; 63 ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 64 for ( i=0; i<nz; i++ ) a->j[i]--; 65 for ( i=0; i<n+1; i++ ) a->i[i]--; 66 } 67 } 68 return 0; 69 } 70 71 /* 72 Adds diagonal pointers to sparse matrix structure. 73 */ 74 75 int MatMarkDiag_SeqBAIJ(Mat A) 76 { 77 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 78 int i,j, *diag, m = a->mbs; 79 80 diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag); 81 PLogObjectMemory(A,(m+1)*sizeof(int)); 82 for ( i=0; i<m; i++ ) { 83 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 84 if (a->j[j] == i) { 85 diag[i] = j; 86 break; 87 } 88 } 89 } 90 a->diag = diag; 91 return 0; 92 } 93 94 #include "draw.h" 95 #include "pinclude/pviewer.h" 96 #include "sys.h" 97 98 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 99 { 100 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 101 int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 102 Scalar *aa; 103 104 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 105 col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 106 col_lens[0] = MAT_COOKIE; 107 col_lens[1] = a->m; 108 col_lens[2] = a->n; 109 col_lens[3] = a->nz*bs2; 110 111 /* store lengths of each row and write (including header) to file */ 112 count = 0; 113 for ( i=0; i<a->mbs; i++ ) { 114 for ( j=0; j<bs; j++ ) { 115 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 116 } 117 } 118 ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr); 119 PetscFree(col_lens); 120 121 /* store column indices (zero start index) */ 122 jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj); 123 count = 0; 124 for ( i=0; i<a->mbs; i++ ) { 125 for ( j=0; j<bs; j++ ) { 126 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 127 for ( l=0; l<bs; l++ ) { 128 jj[count++] = bs*a->j[k] + l; 129 } 130 } 131 } 132 } 133 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr); 134 PetscFree(jj); 135 136 /* store nonzero values */ 137 aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa); 138 count = 0; 139 for ( i=0; i<a->mbs; i++ ) { 140 for ( j=0; j<bs; j++ ) { 141 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 142 for ( l=0; l<bs; l++ ) { 143 aa[count++] = a->a[bs2*k + l*bs + j]; 144 } 145 } 146 } 147 } 148 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr); 149 PetscFree(aa); 150 return 0; 151 } 152 153 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 154 { 155 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 156 int ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2; 157 FILE *fd; 158 char *outputname; 159 160 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 161 ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr); 162 ierr = ViewerGetFormat(viewer,&format); 163 if (format == ASCII_FORMAT_INFO || format == ASCII_FORMAT_INFO_DETAILED) { 164 fprintf(fd," block size is %d\n",bs); 165 } 166 else if (format == ASCII_FORMAT_MATLAB) { 167 SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported"); 168 } 169 else if (format == ASCII_FORMAT_COMMON) { 170 for ( i=0; i<a->mbs; i++ ) { 171 for ( j=0; j<bs; j++ ) { 172 fprintf(fd,"row %d:",i*bs+j); 173 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 174 for ( l=0; l<bs; l++ ) { 175 #if defined(PETSC_COMPLEX) 176 if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0) 177 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 178 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 179 else if (real(a->a[bs2*k + l*bs + j]) != 0.0) 180 fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 181 #else 182 if (a->a[bs2*k + l*bs + j] != 0.0) 183 fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 184 #endif 185 } 186 } 187 fprintf(fd,"\n"); 188 } 189 } 190 } 191 else { 192 for ( i=0; i<a->mbs; i++ ) { 193 for ( j=0; j<bs; j++ ) { 194 fprintf(fd,"row %d:",i*bs+j); 195 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 196 for ( l=0; l<bs; l++ ) { 197 #if defined(PETSC_COMPLEX) 198 if (imag(a->a[bs2*k + l*bs + j]) != 0.0) { 199 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 200 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 201 } 202 else { 203 fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 204 } 205 #else 206 fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 207 #endif 208 } 209 } 210 fprintf(fd,"\n"); 211 } 212 } 213 } 214 fflush(fd); 215 return 0; 216 } 217 218 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer) 219 { 220 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *) A->data; 221 int row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2; 222 double xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r; 223 Scalar *aa; 224 Draw draw; 225 DrawButton button; 226 PetscTruth isnull; 227 228 ViewerDrawGetDraw(viewer,&draw); 229 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 230 231 xr = a->n; yr = a->m; h = yr/10.0; w = xr/10.0; 232 xr += w; yr += h; xl = -w; yl = -h; 233 ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr); 234 /* loop over matrix elements drawing boxes */ 235 color = DRAW_BLUE; 236 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 237 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 238 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 239 x_l = a->j[j]*bs; x_r = x_l + 1.0; 240 aa = a->a + j*bs2; 241 for ( k=0; k<bs; k++ ) { 242 for ( l=0; l<bs; l++ ) { 243 if (PetscReal(*aa++) >= 0.) continue; 244 DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 245 } 246 } 247 } 248 } 249 color = DRAW_CYAN; 250 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 251 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 252 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 253 x_l = a->j[j]*bs; x_r = x_l + 1.0; 254 aa = a->a + j*bs2; 255 for ( k=0; k<bs; k++ ) { 256 for ( l=0; l<bs; l++ ) { 257 if (PetscReal(*aa++) != 0.) continue; 258 DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 259 } 260 } 261 } 262 } 263 264 color = DRAW_RED; 265 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 266 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 267 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 268 x_l = a->j[j]*bs; x_r = x_l + 1.0; 269 aa = a->a + j*bs2; 270 for ( k=0; k<bs; k++ ) { 271 for ( l=0; l<bs; l++ ) { 272 if (PetscReal(*aa++) <= 0.) continue; 273 DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 274 } 275 } 276 } 277 } 278 279 DrawFlush(draw); 280 DrawGetPause(draw,&pause); 281 if (pause >= 0) { PetscSleep(pause); return 0;} 282 283 /* allow the matrix to zoom or shrink */ 284 ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0); 285 while (button != BUTTON_RIGHT) { 286 DrawClear(draw); 287 if (button == BUTTON_LEFT) scale = .5; 288 else if (button == BUTTON_CENTER) scale = 2.; 289 xl = scale*(xl + w - xc) + xc - w*scale; 290 xr = scale*(xr - w - xc) + xc + w*scale; 291 yl = scale*(yl + h - yc) + yc - h*scale; 292 yr = scale*(yr - h - yc) + yc + h*scale; 293 w *= scale; h *= scale; 294 ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr); 295 color = DRAW_BLUE; 296 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 297 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 298 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 299 x_l = a->j[j]*bs; x_r = x_l + 1.0; 300 aa = a->a + j*bs2; 301 for ( k=0; k<bs; k++ ) { 302 for ( l=0; l<bs; l++ ) { 303 if (PetscReal(*aa++) >= 0.) continue; 304 DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 305 } 306 } 307 } 308 } 309 color = DRAW_CYAN; 310 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 311 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 312 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 313 x_l = a->j[j]*bs; x_r = x_l + 1.0; 314 aa = a->a + j*bs2; 315 for ( k=0; k<bs; k++ ) { 316 for ( l=0; l<bs; l++ ) { 317 if (PetscReal(*aa++) != 0.) continue; 318 DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 319 } 320 } 321 } 322 } 323 324 color = DRAW_RED; 325 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 326 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 327 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 328 x_l = a->j[j]*bs; x_r = x_l + 1.0; 329 aa = a->a + j*bs2; 330 for ( k=0; k<bs; k++ ) { 331 for ( l=0; l<bs; l++ ) { 332 if (PetscReal(*aa++) <= 0.) continue; 333 DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 334 } 335 } 336 } 337 } 338 ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0); 339 } 340 return 0; 341 } 342 343 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer) 344 { 345 Mat A = (Mat) obj; 346 ViewerType vtype; 347 int ierr; 348 349 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 350 if (vtype == MATLAB_VIEWER) { 351 SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported"); 352 } 353 else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){ 354 return MatView_SeqBAIJ_ASCII(A,viewer); 355 } 356 else if (vtype == BINARY_FILE_VIEWER) { 357 return MatView_SeqBAIJ_Binary(A,viewer); 358 } 359 else if (vtype == DRAW_VIEWER) { 360 return MatView_SeqBAIJ_Draw(A,viewer); 361 } 362 return 0; 363 } 364 365 #define CHUNKSIZE 10 366 367 /* This version has row oriented v */ 368 static int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 369 { 370 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 371 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 372 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 373 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 374 int ridx,cidx,bs2=a->bs2; 375 Scalar *ap,value,*aa=a->a,*bap; 376 377 for ( k=0; k<m; k++ ) { /* loop over added rows */ 378 row = im[k]; brow = row/bs; 379 if (row < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative row"); 380 if (row >= a->m) SETERRQ(1,"MatSetValues_SeqBAIJ:Row too large"); 381 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 382 rmax = imax[brow]; nrow = ailen[brow]; 383 low = 0; 384 for ( l=0; l<n; l++ ) { /* loop over added columns */ 385 if (in[l] < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative column"); 386 if (in[l] >= a->n) SETERRQ(1,"MatSetValues_SeqBAIJ:Column too large"); 387 col = in[l]; bcol = col/bs; 388 ridx = row % bs; cidx = col % bs; 389 if (roworiented) { 390 value = *v++; 391 } 392 else { 393 value = v[k + l*m]; 394 } 395 if (!sorted) low = 0; high = nrow; 396 while (high-low > 5) { 397 t = (low+high)/2; 398 if (rp[t] > bcol) high = t; 399 else low = t; 400 } 401 for ( i=low; i<high; i++ ) { 402 if (rp[i] > bcol) break; 403 if (rp[i] == bcol) { 404 bap = ap + bs2*i + bs*cidx + ridx; 405 if (is == ADD_VALUES) *bap += value; 406 else *bap = value; 407 goto noinsert; 408 } 409 } 410 if (nonew) goto noinsert; 411 if (nrow >= rmax) { 412 /* there is no extra room in row, therefore enlarge */ 413 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 414 Scalar *new_a; 415 416 /* malloc new storage space */ 417 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 418 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 419 new_j = (int *) (new_a + bs2*new_nz); 420 new_i = new_j + new_nz; 421 422 /* copy over old data into new slots */ 423 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} 424 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 425 PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); 426 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 427 PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, 428 len*sizeof(int)); 429 PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); 430 PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 431 PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), 432 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); 433 /* free up old matrix storage */ 434 PetscFree(a->a); 435 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 436 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 437 a->singlemalloc = 1; 438 439 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 440 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 441 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 442 a->maxnz += bs2*CHUNKSIZE; 443 a->reallocs++; 444 a->nz++; 445 } 446 N = nrow++ - 1; 447 /* shift up all the later entries in this row */ 448 for ( ii=N; ii>=i; ii-- ) { 449 rp[ii+1] = rp[ii]; 450 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 451 } 452 if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 453 rp[i] = bcol; 454 ap[bs2*i + bs*cidx + ridx] = value; 455 noinsert:; 456 low = i; 457 } 458 ailen[brow] = nrow; 459 } 460 return 0; 461 } 462 463 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 464 { 465 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 466 *m = a->m; *n = a->n; 467 return 0; 468 } 469 470 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 471 { 472 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 473 *m = 0; *n = a->m; 474 return 0; 475 } 476 477 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 478 { 479 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 480 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 481 Scalar *aa,*v_i,*aa_i; 482 483 bs = a->bs; 484 ai = a->i; 485 aj = a->j; 486 aa = a->a; 487 bs2 = a->bs2; 488 489 if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range"); 490 491 bn = row/bs; /* Block number */ 492 bp = row % bs; /* Block Position */ 493 M = ai[bn+1] - ai[bn]; 494 *nz = bs*M; 495 496 if (v) { 497 *v = 0; 498 if (*nz) { 499 *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v); 500 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 501 v_i = *v + i*bs; 502 aa_i = aa + bs2*(ai[bn] + i); 503 for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];} 504 } 505 } 506 } 507 508 if (idx) { 509 *idx = 0; 510 if (*nz) { 511 *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx); 512 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 513 idx_i = *idx + i*bs; 514 itmp = bs*aj[ai[bn] + i]; 515 for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;} 516 } 517 } 518 } 519 return 0; 520 } 521 522 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 523 { 524 if (idx) {if (*idx) PetscFree(*idx);} 525 if (v) {if (*v) PetscFree(*v);} 526 return 0; 527 } 528 529 static int MatTranspose_SeqBAIJ(Mat A,Mat *B) 530 { 531 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 532 Mat C; 533 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 534 int *rows,*cols,bs2=a->bs2; 535 Scalar *array=a->a; 536 537 if (B==PETSC_NULL && mbs!=nbs) 538 SETERRQ(1,"MatTranspose_SeqBAIJ:Square matrix only for in-place"); 539 col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col); 540 PetscMemzero(col,(1+nbs)*sizeof(int)); 541 542 for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1; 543 ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr); 544 PetscFree(col); 545 rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows); 546 cols = rows + bs; 547 for ( i=0; i<mbs; i++ ) { 548 cols[0] = i*bs; 549 for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1; 550 len = ai[i+1] - ai[i]; 551 for ( j=0; j<len; j++ ) { 552 rows[0] = (*aj++)*bs; 553 for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1; 554 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr); 555 array += bs2; 556 } 557 } 558 PetscFree(rows); 559 560 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 561 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 562 563 if (B != PETSC_NULL) { 564 *B = C; 565 } else { 566 /* This isn't really an in-place transpose */ 567 PetscFree(a->a); 568 if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);} 569 if (a->diag) PetscFree(a->diag); 570 if (a->ilen) PetscFree(a->ilen); 571 if (a->imax) PetscFree(a->imax); 572 if (a->solve_work) PetscFree(a->solve_work); 573 PetscFree(a); 574 PetscMemcpy(A,C,sizeof(struct _Mat)); 575 PetscHeaderDestroy(C); 576 } 577 return 0; 578 } 579 580 581 static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 582 { 583 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 584 int fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax; 585 int m = a->m,*ip, N, *ailen = a->ilen; 586 int mbs = a->mbs, bs2 = a->bs2; 587 Scalar *aa = a->a, *ap; 588 589 if (mode == MAT_FLUSH_ASSEMBLY) return 0; 590 591 for ( i=1; i<mbs; i++ ) { 592 /* move each row back by the amount of empty slots (fshift) before it*/ 593 fshift += imax[i-1] - ailen[i-1]; 594 if (fshift) { 595 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 596 N = ailen[i]; 597 for ( j=0; j<N; j++ ) { 598 ip[j-fshift] = ip[j]; 599 PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar)); 600 } 601 } 602 ai[i] = ai[i-1] + ailen[i-1]; 603 } 604 if (mbs) { 605 fshift += imax[mbs-1] - ailen[mbs-1]; 606 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 607 } 608 /* reset ilen and imax for each row */ 609 for ( i=0; i<mbs; i++ ) { 610 ailen[i] = imax[i] = ai[i+1] - ai[i]; 611 } 612 a->nz = ai[mbs]; 613 614 /* diagonals may have moved, so kill the diagonal pointers */ 615 if (fshift && a->diag) { 616 PetscFree(a->diag); 617 PLogObjectMemory(A,-(m+1)*sizeof(int)); 618 a->diag = 0; 619 } 620 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ: Unneed storage space (blocks) %d used %d, rows %d, block size %d\n", fshift*bs2,a->nz*bs2,m,a->bs); 621 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ: Number of mallocs during MatSetValues %d\n", 622 a->reallocs); 623 return 0; 624 } 625 626 static int MatZeroEntries_SeqBAIJ(Mat A) 627 { 628 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 629 PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar)); 630 return 0; 631 } 632 633 int MatDestroy_SeqBAIJ(PetscObject obj) 634 { 635 Mat A = (Mat) obj; 636 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 637 638 #if defined(PETSC_LOG) 639 PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 640 #endif 641 PetscFree(a->a); 642 if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);} 643 if (a->diag) PetscFree(a->diag); 644 if (a->ilen) PetscFree(a->ilen); 645 if (a->imax) PetscFree(a->imax); 646 if (a->solve_work) PetscFree(a->solve_work); 647 if (a->mult_work) PetscFree(a->mult_work); 648 PetscFree(a); 649 PLogObjectDestroy(A); 650 PetscHeaderDestroy(A); 651 return 0; 652 } 653 654 static int MatSetOption_SeqBAIJ(Mat A,MatOption op) 655 { 656 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 657 if (op == MAT_ROW_ORIENTED) a->roworiented = 1; 658 else if (op == MAT_COLUMN_ORIENTED) a->roworiented = 0; 659 else if (op == MAT_COLUMNS_SORTED) a->sorted = 1; 660 else if (op == MAT_NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 661 else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 662 else if (op == MAT_ROWS_SORTED || 663 op == MAT_SYMMETRIC || 664 op == MAT_STRUCTURALLY_SYMMETRIC || 665 op == MAT_YES_NEW_DIAGONALS) 666 PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n"); 667 else if (op == MAT_NO_NEW_DIAGONALS) 668 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:MAT_NO_NEW_DIAGONALS");} 669 else 670 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");} 671 return 0; 672 } 673 674 675 /* -------------------------------------------------------*/ 676 /* Should check that shapes of vectors and matrices match */ 677 /* -------------------------------------------------------*/ 678 #include "pinclude/plapack.h" 679 680 static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz) 681 { 682 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 683 register Scalar *x,*z,*v,sum; 684 int mbs=a->mbs,i,*idx,*ii,n; 685 686 VecGetArray_Fast(xx,x); 687 VecGetArray_Fast(zz,z); 688 689 idx = a->j; 690 v = a->a; 691 ii = a->i; 692 693 for ( i=0; i<mbs; i++ ) { 694 n = ii[1] - ii[0]; ii++; 695 sum = 0.0; 696 while (n--) sum += *v++ * x[*idx++]; 697 z[i] = sum; 698 } 699 VecRestoreArray_Fast(xx,x); 700 VecRestoreArray_Fast(zz,z); 701 PLogFlops(2*a->nz - a->m); 702 return 0; 703 } 704 705 static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz) 706 { 707 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 708 register Scalar *x,*z,*v,*xb,sum1,sum2; 709 register Scalar x1,x2; 710 int mbs=a->mbs,i,*idx,*ii; 711 int j,n; 712 713 VecGetArray_Fast(xx,x); 714 VecGetArray_Fast(zz,z); 715 716 idx = a->j; 717 v = a->a; 718 ii = a->i; 719 720 for ( i=0; i<mbs; i++ ) { 721 n = ii[1] - ii[0]; ii++; 722 sum1 = 0.0; sum2 = 0.0; 723 for ( j=0; j<n; j++ ) { 724 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 725 sum1 += v[0]*x1 + v[2]*x2; 726 sum2 += v[1]*x1 + v[3]*x2; 727 v += 4; 728 } 729 z[0] = sum1; z[1] = sum2; 730 z += 2; 731 } 732 VecRestoreArray_Fast(xx,x); 733 VecRestoreArray_Fast(zz,z); 734 PLogFlops(4*a->nz - a->m); 735 return 0; 736 } 737 738 static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz) 739 { 740 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 741 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 742 int mbs=a->mbs,i,*idx,*ii,j,n; 743 744 VecGetArray_Fast(xx,x); 745 VecGetArray_Fast(zz,z); 746 747 idx = a->j; 748 v = a->a; 749 ii = a->i; 750 751 for ( i=0; i<mbs; i++ ) { 752 n = ii[1] - ii[0]; ii++; 753 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 754 for ( j=0; j<n; j++ ) { 755 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 756 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 757 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 758 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 759 v += 9; 760 } 761 z[0] = sum1; z[1] = sum2; z[2] = sum3; 762 z += 3; 763 } 764 VecRestoreArray_Fast(xx,x); 765 VecRestoreArray_Fast(zz,z); 766 PLogFlops(18*a->nz - a->m); 767 return 0; 768 } 769 770 static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz) 771 { 772 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 773 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4; 774 register Scalar x1,x2,x3,x4; 775 int mbs=a->mbs,i,*idx,*ii; 776 int j,n; 777 778 VecGetArray_Fast(xx,x); 779 VecGetArray_Fast(zz,z); 780 781 idx = a->j; 782 v = a->a; 783 ii = a->i; 784 785 for ( i=0; i<mbs; i++ ) { 786 n = ii[1] - ii[0]; ii++; 787 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 788 for ( j=0; j<n; j++ ) { 789 xb = x + 4*(*idx++); 790 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 791 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 792 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 793 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 794 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 795 v += 16; 796 } 797 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 798 z += 4; 799 } 800 VecRestoreArray_Fast(xx,x); 801 VecRestoreArray_Fast(zz,z); 802 PLogFlops(32*a->nz - a->m); 803 return 0; 804 } 805 806 static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz) 807 { 808 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 809 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5; 810 register Scalar x1,x2,x3,x4,x5; 811 int mbs=a->mbs,i,*idx,*ii,j,n; 812 813 VecGetArray_Fast(xx,x); 814 VecGetArray_Fast(zz,z); 815 816 idx = a->j; 817 v = a->a; 818 ii = a->i; 819 820 for ( i=0; i<mbs; i++ ) { 821 n = ii[1] - ii[0]; ii++; 822 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 823 for ( j=0; j<n; j++ ) { 824 xb = x + 5*(*idx++); 825 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 826 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 827 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 828 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 829 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 830 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 831 v += 25; 832 } 833 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 834 z += 5; 835 } 836 VecRestoreArray_Fast(xx,x); 837 VecRestoreArray_Fast(zz,z); 838 PLogFlops(50*a->nz - a->m); 839 return 0; 840 } 841 842 static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz) 843 { 844 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 845 register Scalar *x,*z,*v,*xb; 846 int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2; 847 int _One = 1,ncols,k; 848 Scalar _DOne = 1.0,*work,*workt,_DZero = 0.0; 849 850 VecGetArray_Fast(xx,x); 851 VecGetArray_Fast(zz,z); 852 853 idx = a->j; 854 v = a->a; 855 ii = a->i; 856 857 858 if (!a->mult_work) { 859 k = PetscMax(a->m,a->n); 860 a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work); 861 } 862 work = a->mult_work; 863 for ( i=0; i<mbs; i++ ) { 864 n = ii[1] - ii[0]; ii++; 865 ncols = n*bs; 866 workt = work; 867 for ( j=0; j<n; j++ ) { 868 xb = x + bs*(*idx++); 869 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 870 workt += bs; 871 } 872 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One); 873 v += n*bs2; 874 z += bs; 875 } 876 VecRestoreArray_Fast(xx,x); 877 VecRestoreArray_Fast(zz,z); 878 PLogFlops(2*a->nz*bs2 - a->m); 879 return 0; 880 } 881 882 static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 883 { 884 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 885 register Scalar *x,*y,*z,*v,sum; 886 int mbs=a->mbs,i,*idx,*ii,n; 887 888 VecGetArray_Fast(xx,x); 889 VecGetArray_Fast(yy,y); 890 VecGetArray_Fast(zz,z); 891 892 idx = a->j; 893 v = a->a; 894 ii = a->i; 895 896 for ( i=0; i<mbs; i++ ) { 897 n = ii[1] - ii[0]; ii++; 898 sum = y[i]; 899 while (n--) sum += *v++ * x[*idx++]; 900 z[i] = sum; 901 } 902 VecRestoreArray_Fast(xx,x); 903 VecRestoreArray_Fast(yy,y); 904 VecRestoreArray_Fast(zz,z); 905 PLogFlops(2*a->nz); 906 return 0; 907 } 908 909 static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 910 { 911 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 912 register Scalar *x,*y,*z,*v,*xb,sum1,sum2; 913 register Scalar x1,x2; 914 int mbs=a->mbs,i,*idx,*ii; 915 int j,n; 916 917 VecGetArray_Fast(xx,x); 918 VecGetArray_Fast(yy,y); 919 VecGetArray_Fast(zz,z); 920 921 idx = a->j; 922 v = a->a; 923 ii = a->i; 924 925 for ( i=0; i<mbs; i++ ) { 926 n = ii[1] - ii[0]; ii++; 927 sum1 = y[0]; sum2 = y[1]; 928 for ( j=0; j<n; j++ ) { 929 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 930 sum1 += v[0]*x1 + v[2]*x2; 931 sum2 += v[1]*x1 + v[3]*x2; 932 v += 4; 933 } 934 z[0] = sum1; z[1] = sum2; 935 z += 2; y += 2; 936 } 937 VecRestoreArray_Fast(xx,x); 938 VecRestoreArray_Fast(yy,y); 939 VecRestoreArray_Fast(zz,z); 940 PLogFlops(4*a->nz); 941 return 0; 942 } 943 944 static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 945 { 946 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 947 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 948 int mbs=a->mbs,i,*idx,*ii,j,n; 949 950 VecGetArray_Fast(xx,x); 951 VecGetArray_Fast(yy,y); 952 VecGetArray_Fast(zz,z); 953 954 idx = a->j; 955 v = a->a; 956 ii = a->i; 957 958 for ( i=0; i<mbs; i++ ) { 959 n = ii[1] - ii[0]; ii++; 960 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; 961 for ( j=0; j<n; j++ ) { 962 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 963 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 964 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 965 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 966 v += 9; 967 } 968 z[0] = sum1; z[1] = sum2; z[2] = sum3; 969 z += 3; y += 3; 970 } 971 VecRestoreArray_Fast(xx,x); 972 VecRestoreArray_Fast(yy,y); 973 VecRestoreArray_Fast(zz,z); 974 PLogFlops(18*a->nz); 975 return 0; 976 } 977 978 static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 979 { 980 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 981 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4; 982 register Scalar x1,x2,x3,x4; 983 int mbs=a->mbs,i,*idx,*ii; 984 int j,n; 985 986 VecGetArray_Fast(xx,x); 987 VecGetArray_Fast(yy,y); 988 VecGetArray_Fast(zz,z); 989 990 idx = a->j; 991 v = a->a; 992 ii = a->i; 993 994 for ( i=0; i<mbs; i++ ) { 995 n = ii[1] - ii[0]; ii++; 996 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; 997 for ( j=0; j<n; j++ ) { 998 xb = x + 4*(*idx++); 999 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1000 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 1001 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 1002 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 1003 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 1004 v += 16; 1005 } 1006 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 1007 z += 4; y += 4; 1008 } 1009 VecRestoreArray_Fast(xx,x); 1010 VecRestoreArray_Fast(yy,y); 1011 VecRestoreArray_Fast(zz,z); 1012 PLogFlops(32*a->nz); 1013 return 0; 1014 } 1015 1016 static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 1017 { 1018 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1019 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5; 1020 register Scalar x1,x2,x3,x4,x5; 1021 int mbs=a->mbs,i,*idx,*ii,j,n; 1022 1023 VecGetArray_Fast(xx,x); 1024 VecGetArray_Fast(yy,y); 1025 VecGetArray_Fast(zz,z); 1026 1027 idx = a->j; 1028 v = a->a; 1029 ii = a->i; 1030 1031 for ( i=0; i<mbs; i++ ) { 1032 n = ii[1] - ii[0]; ii++; 1033 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; 1034 for ( j=0; j<n; j++ ) { 1035 xb = x + 5*(*idx++); 1036 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 1037 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 1038 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 1039 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 1040 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 1041 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 1042 v += 25; 1043 } 1044 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 1045 z += 5; y += 5; 1046 } 1047 VecRestoreArray_Fast(xx,x); 1048 VecRestoreArray_Fast(yy,y); 1049 VecRestoreArray_Fast(zz,z); 1050 PLogFlops(50*a->nz); 1051 return 0; 1052 } 1053 1054 static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 1055 { 1056 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1057 register Scalar *x,*z,*v,*xb; 1058 int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr; 1059 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1060 1061 if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 1062 1063 VecGetArray_Fast(xx,x); 1064 VecGetArray_Fast(zz,z); 1065 1066 idx = a->j; 1067 v = a->a; 1068 ii = a->i; 1069 1070 1071 if (!a->mult_work) { 1072 k = PetscMax(a->m,a->n); 1073 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work); 1074 } 1075 work = a->mult_work; 1076 for ( i=0; i<mbs; i++ ) { 1077 n = ii[1] - ii[0]; ii++; 1078 ncols = n*bs; 1079 workt = work; 1080 for ( j=0; j<n; j++ ) { 1081 xb = x + bs*(*idx++); 1082 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 1083 workt += bs; 1084 } 1085 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); 1086 v += n*bs2; 1087 z += bs; 1088 } 1089 VecRestoreArray_Fast(xx,x); 1090 VecRestoreArray_Fast(zz,z); 1091 PLogFlops(2*a->nz*bs2 ); 1092 return 0; 1093 } 1094 1095 static int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz) 1096 { 1097 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1098 Scalar *xg,*zg,*zb; 1099 register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 1100 int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 1101 int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1102 1103 1104 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 1105 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 1106 PetscMemzero(z,N*sizeof(Scalar)); 1107 1108 idx = a->j; 1109 v = a->a; 1110 ii = a->i; 1111 1112 switch (bs) { 1113 case 1: 1114 for ( i=0; i<mbs; i++ ) { 1115 n = ii[1] - ii[0]; ii++; 1116 xb = x + i; x1 = xb[0]; 1117 ib = idx + ai[i]; 1118 for ( j=0; j<n; j++ ) { 1119 rval = ib[j]; 1120 z[rval] += *v++ * x1; 1121 } 1122 } 1123 break; 1124 case 2: 1125 for ( i=0; i<mbs; i++ ) { 1126 n = ii[1] - ii[0]; ii++; 1127 xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1128 ib = idx + ai[i]; 1129 for ( j=0; j<n; j++ ) { 1130 rval = ib[j]*2; 1131 z[rval++] += v[0]*x1 + v[1]*x2; 1132 z[rval++] += v[2]*x1 + v[3]*x2; 1133 v += 4; 1134 } 1135 } 1136 break; 1137 case 3: 1138 for ( i=0; i<mbs; i++ ) { 1139 n = ii[1] - ii[0]; ii++; 1140 xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1141 ib = idx + ai[i]; 1142 for ( j=0; j<n; j++ ) { 1143 rval = ib[j]*3; 1144 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1145 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1146 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1147 v += 9; 1148 } 1149 } 1150 break; 1151 case 4: 1152 for ( i=0; i<mbs; i++ ) { 1153 n = ii[1] - ii[0]; ii++; 1154 xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1155 ib = idx + ai[i]; 1156 for ( j=0; j<n; j++ ) { 1157 rval = ib[j]*4; 1158 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1159 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1160 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1161 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1162 v += 16; 1163 } 1164 } 1165 break; 1166 case 5: 1167 for ( i=0; i<mbs; i++ ) { 1168 n = ii[1] - ii[0]; ii++; 1169 xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1170 x4 = xb[3]; x5 = xb[4]; 1171 ib = idx + ai[i]; 1172 for ( j=0; j<n; j++ ) { 1173 rval = ib[j]*5; 1174 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1175 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1176 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1177 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1178 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1179 v += 25; 1180 } 1181 } 1182 break; 1183 /* block sizes larger then 5 by 5 are handled by BLAS */ 1184 default: { 1185 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1186 if (!a->mult_work) { 1187 k = PetscMax(a->m,a->n); 1188 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1189 CHKPTRQ(a->mult_work); 1190 } 1191 work = a->mult_work; 1192 for ( i=0; i<mbs; i++ ) { 1193 n = ii[1] - ii[0]; ii++; 1194 ncols = n*bs; 1195 PetscMemzero(work,ncols*sizeof(Scalar)); 1196 LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1197 v += n*bs2; 1198 x += bs; 1199 workt = work; 1200 for ( j=0; j<n; j++ ) { 1201 zb = z + bs*(*idx++); 1202 for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1203 workt += bs; 1204 } 1205 } 1206 } 1207 } 1208 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 1209 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1210 PLogFlops(2*a->nz*a->bs2 - a->n); 1211 return 0; 1212 } 1213 static int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1214 { 1215 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1216 Scalar *xg,*zg,*zb; 1217 register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 1218 int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 1219 int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1220 1221 1222 1223 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 1224 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 1225 1226 if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 1227 else PetscMemzero(z,N*sizeof(Scalar)); 1228 1229 1230 idx = a->j; 1231 v = a->a; 1232 ii = a->i; 1233 1234 switch (bs) { 1235 case 1: 1236 for ( i=0; i<mbs; i++ ) { 1237 n = ii[1] - ii[0]; ii++; 1238 xb = x + i; x1 = xb[0]; 1239 ib = idx + ai[i]; 1240 for ( j=0; j<n; j++ ) { 1241 rval = ib[j]; 1242 z[rval] += *v++ * x1; 1243 } 1244 } 1245 break; 1246 case 2: 1247 for ( i=0; i<mbs; i++ ) { 1248 n = ii[1] - ii[0]; ii++; 1249 xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1250 ib = idx + ai[i]; 1251 for ( j=0; j<n; j++ ) { 1252 rval = ib[j]*2; 1253 z[rval++] += v[0]*x1 + v[1]*x2; 1254 z[rval++] += v[2]*x1 + v[3]*x2; 1255 v += 4; 1256 } 1257 } 1258 break; 1259 case 3: 1260 for ( i=0; i<mbs; i++ ) { 1261 n = ii[1] - ii[0]; ii++; 1262 xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1263 ib = idx + ai[i]; 1264 for ( j=0; j<n; j++ ) { 1265 rval = ib[j]*3; 1266 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1267 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1268 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1269 v += 9; 1270 } 1271 } 1272 break; 1273 case 4: 1274 for ( i=0; i<mbs; i++ ) { 1275 n = ii[1] - ii[0]; ii++; 1276 xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1277 ib = idx + ai[i]; 1278 for ( j=0; j<n; j++ ) { 1279 rval = ib[j]*4; 1280 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1281 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1282 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1283 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1284 v += 16; 1285 } 1286 } 1287 break; 1288 case 5: 1289 for ( i=0; i<mbs; i++ ) { 1290 n = ii[1] - ii[0]; ii++; 1291 xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1292 x4 = xb[3]; x5 = xb[4]; 1293 ib = idx + ai[i]; 1294 for ( j=0; j<n; j++ ) { 1295 rval = ib[j]*5; 1296 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1297 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1298 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1299 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1300 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1301 v += 25; 1302 } 1303 } 1304 break; 1305 /* block sizes larger then 5 by 5 are handled by BLAS */ 1306 default: { 1307 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1308 if (!a->mult_work) { 1309 k = PetscMax(a->m,a->n); 1310 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1311 CHKPTRQ(a->mult_work); 1312 } 1313 work = a->mult_work; 1314 for ( i=0; i<mbs; i++ ) { 1315 n = ii[1] - ii[0]; ii++; 1316 ncols = n*bs; 1317 PetscMemzero(work,ncols*sizeof(Scalar)); 1318 LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1319 v += n*bs2; 1320 x += bs; 1321 workt = work; 1322 for ( j=0; j<n; j++ ) { 1323 zb = z + bs*(*idx++); 1324 for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1325 workt += bs; 1326 } 1327 } 1328 } 1329 } 1330 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 1331 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1332 PLogFlops(2*a->nz*a->bs2); 1333 return 0; 1334 } 1335 1336 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem) 1337 { 1338 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1339 if (nz) *nz = a->bs2*a->nz; 1340 if (nza) *nza = a->maxnz; 1341 if (mem) *mem = (int)A->mem; 1342 return 0; 1343 } 1344 1345 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg) 1346 { 1347 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 1348 1349 if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type"); 1350 1351 /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 1352 if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)|| 1353 (a->nz != b->nz)) { 1354 *flg = PETSC_FALSE; return 0; 1355 } 1356 1357 /* if the a->i are the same */ 1358 if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) { 1359 *flg = PETSC_FALSE; return 0; 1360 } 1361 1362 /* if a->j are the same */ 1363 if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) { 1364 *flg = PETSC_FALSE; return 0; 1365 } 1366 1367 /* if a->a are the same */ 1368 if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) { 1369 *flg = PETSC_FALSE; return 0; 1370 } 1371 *flg = PETSC_TRUE; 1372 return 0; 1373 1374 } 1375 1376 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v) 1377 { 1378 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1379 int i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 1380 Scalar *x, zero = 0.0,*aa,*aa_j; 1381 1382 bs = a->bs; 1383 aa = a->a; 1384 ai = a->i; 1385 aj = a->j; 1386 ambs = a->mbs; 1387 bs2 = a->bs2; 1388 1389 VecSet(&zero,v); 1390 VecGetArray(v,&x); VecGetLocalSize(v,&n); 1391 if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector"); 1392 for ( i=0; i<ambs; i++ ) { 1393 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1394 if (aj[j] == i) { 1395 row = i*bs; 1396 aa_j = aa+j*bs2; 1397 for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 1398 break; 1399 } 1400 } 1401 } 1402 return 0; 1403 } 1404 1405 static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr) 1406 { 1407 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1408 Scalar *l,*r,x,*v,*aa,*li,*ri; 1409 int i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2; 1410 1411 ai = a->i; 1412 aj = a->j; 1413 aa = a->a; 1414 m = a->m; 1415 n = a->n; 1416 bs = a->bs; 1417 mbs = a->mbs; 1418 bs2 = a->bs2; 1419 if (ll) { 1420 VecGetArray(ll,&l); VecGetSize(ll,&lm); 1421 if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length"); 1422 for ( i=0; i<mbs; i++ ) { /* for each block row */ 1423 M = ai[i+1] - ai[i]; 1424 li = l + i*bs; 1425 v = aa + bs2*ai[i]; 1426 for ( j=0; j<M; j++ ) { /* for each block */ 1427 for ( k=0; k<bs2; k++ ) { 1428 (*v++) *= li[k%bs]; 1429 } 1430 } 1431 } 1432 } 1433 1434 if (rr) { 1435 VecGetArray(rr,&r); VecGetSize(rr,&rn); 1436 if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length"); 1437 for ( i=0; i<mbs; i++ ) { /* for each block row */ 1438 M = ai[i+1] - ai[i]; 1439 v = aa + bs2*ai[i]; 1440 for ( j=0; j<M; j++ ) { /* for each block */ 1441 ri = r + bs*aj[ai[i]+j]; 1442 for ( k=0; k<bs; k++ ) { 1443 x = ri[k]; 1444 for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x; 1445 } 1446 } 1447 } 1448 } 1449 return 0; 1450 } 1451 1452 1453 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 1454 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 1455 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int); 1456 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*); 1457 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**); 1458 1459 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec); 1460 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec); 1461 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec); 1462 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec); 1463 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec); 1464 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec); 1465 1466 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 1467 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 1468 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 1469 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 1470 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 1471 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 1472 1473 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm) 1474 { 1475 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1476 Scalar *v = a->a; 1477 double sum = 0.0; 1478 int i,nz=a->nz,bs2=a->bs2; 1479 1480 if (type == NORM_FROBENIUS) { 1481 for (i=0; i< bs2*nz; i++ ) { 1482 #if defined(PETSC_COMPLEX) 1483 sum += real(conj(*v)*(*v)); v++; 1484 #else 1485 sum += (*v)*(*v); v++; 1486 #endif 1487 } 1488 *norm = sqrt(sum); 1489 } 1490 else { 1491 SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet"); 1492 } 1493 return 0; 1494 } 1495 1496 /* 1497 note: This can only work for identity for row and col. It would 1498 be good to check this and otherwise generate an error. 1499 */ 1500 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill) 1501 { 1502 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1503 Mat outA; 1504 int ierr; 1505 1506 if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported"); 1507 1508 outA = inA; 1509 inA->factor = FACTOR_LU; 1510 a->row = row; 1511 a->col = col; 1512 1513 a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 1514 1515 if (!a->diag) { 1516 ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr); 1517 } 1518 ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr); 1519 return 0; 1520 } 1521 1522 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA) 1523 { 1524 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1525 int one = 1, totalnz = a->bs2*a->nz; 1526 BLscal_( &totalnz, alpha, a->a, &one ); 1527 PLogFlops(totalnz); 1528 return 0; 1529 } 1530 1531 static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 1532 { 1533 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1534 int *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 1535 int *ai = a->i, *ailen = a->ilen; 1536 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 1537 Scalar *ap, *aa = a->a, zero = 0.0; 1538 1539 for ( k=0; k<m; k++ ) { /* loop over rows */ 1540 row = im[k]; brow = row/bs; 1541 if (row < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative row"); 1542 if (row >= a->m) SETERRQ(1,"MatGetValues_SeqBAIJ:Row too large"); 1543 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1544 nrow = ailen[brow]; 1545 for ( l=0; l<n; l++ ) { /* loop over columns */ 1546 if (in[l] < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative column"); 1547 if (in[l] >= a->n) SETERRQ(1,"MatGetValues_SeqBAIJ:Column too large"); 1548 col = in[l] ; 1549 bcol = col/bs; 1550 cidx = col%bs; 1551 ridx = row%bs; 1552 high = nrow; 1553 low = 0; /* assume unsorted */ 1554 while (high-low > 5) { 1555 t = (low+high)/2; 1556 if (rp[t] > bcol) high = t; 1557 else low = t; 1558 } 1559 for ( i=low; i<high; i++ ) { 1560 if (rp[i] > bcol) break; 1561 if (rp[i] == bcol) { 1562 *v++ = ap[bs2*i+bs*cidx+ridx]; 1563 goto finished; 1564 } 1565 } 1566 *v++ = zero; 1567 finished:; 1568 } 1569 } 1570 return 0; 1571 } 1572 1573 static int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs) 1574 { 1575 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data; 1576 *bs = baij->bs; 1577 return 0; 1578 } 1579 1580 /* idx should be of length atlease bs */ 1581 static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg) 1582 { 1583 int i,row; 1584 row = idx[0]; 1585 if (row%bs!=0) { *flg = PETSC_FALSE; return 0; } 1586 1587 for ( i=1; i<bs; i++ ) { 1588 if (row+i != idx[i]) { *flg = PETSC_FALSE; return 0; } 1589 } 1590 *flg = PETSC_TRUE; 1591 return 0; 1592 } 1593 1594 static int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag) 1595 { 1596 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1597 IS is_local; 1598 int ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2; 1599 PetscTruth flg; 1600 Scalar zero = 0.0,*aa; 1601 1602 /* Make a copy of the IS and sort it */ 1603 ierr = ISGetSize(is,&is_n);CHKERRQ(ierr); 1604 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1605 ierr = ISCreateSeq(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr); 1606 ierr = ISSort(is_local); CHKERRQ(ierr); 1607 ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr); 1608 1609 i = 0; 1610 while (i < is_n) { 1611 if (rows[i]<0 || rows[i]>m) SETERRQ(1,"MatZeroRows_SeqBAIJ:row out of range"); 1612 flg = PETSC_FALSE; 1613 if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); } 1614 if (flg) { /* There exists a block of rows to be Zerowed */ 1615 baij->ilen[rows[i]/bs] = 0; 1616 i += bs; 1617 } else { /* Zero out only the requested row */ 1618 count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs; 1619 aa = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs); 1620 for ( j=0; j<count; j++ ) { 1621 aa[0] = zero; 1622 aa+=bs; 1623 } 1624 i++; 1625 } 1626 } 1627 if (diag) { 1628 for ( j=0; j<is_n; j++ ) { 1629 ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1630 } 1631 } 1632 ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr); 1633 ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr); 1634 ierr = ISDestroy(is_local); CHKERRQ(ierr); 1635 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1636 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1637 1638 return 0; 1639 } 1640 int MatPrintHelp_SeqBAIJ(Mat A) 1641 { 1642 static int called = 0; 1643 MPI_Comm comm = A->comm; 1644 1645 if (called) return 0; else called = 1; 1646 PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n"); 1647 PetscPrintf(comm," -mat_block_size <block_size>\n"); 1648 return 0; 1649 } 1650 1651 /* -------------------------------------------------------------------*/ 1652 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ, 1653 MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ, 1654 MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N, 1655 MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ, 1656 MatSolve_SeqBAIJ_N,0, 1657 0,0, 1658 MatLUFactor_SeqBAIJ,0, 1659 0, 1660 MatTranspose_SeqBAIJ, 1661 MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ, 1662 MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ, 1663 0,MatAssemblyEnd_SeqBAIJ, 1664 0, 1665 MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ, 1666 MatGetReordering_SeqBAIJ, 1667 MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0, 1668 MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ, 1669 MatILUFactorSymbolic_SeqBAIJ,0, 1670 0,0,/* MatConvert_SeqBAIJ */ 0, 1671 MatConvertSameType_SeqBAIJ,0,0, 1672 MatILUFactor_SeqBAIJ,0,0, 1673 MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ, 1674 MatGetValues_SeqBAIJ,0, 1675 MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ, 1676 0,0,0,MatGetBlockSize_SeqBAIJ}; 1677 1678 /*@C 1679 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1680 compressed row) format. For good matrix assembly performance the 1681 user should preallocate the matrix storage by setting the parameter nz 1682 (or the array nzz). By setting these parameters accurately, performance 1683 during matrix assembly can be increased by more than a factor of 50. 1684 1685 Input Parameters: 1686 . comm - MPI communicator, set to MPI_COMM_SELF 1687 . bs - size of block 1688 . m - number of rows 1689 . n - number of columns 1690 . nz - number of block nonzeros per block row (same for all rows) 1691 . nzz - array containing the number of block nonzeros in the various block rows 1692 (possibly different for each block row) or PETSC_NULL 1693 1694 Output Parameter: 1695 . A - the matrix 1696 1697 Notes: 1698 The block AIJ format is fully compatible with standard Fortran 77 1699 storage. That is, the stored row and column indices can begin at 1700 either one (as in Fortran) or zero. See the users' manual for details. 1701 1702 Specify the preallocated storage with either nz or nnz (not both). 1703 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1704 allocation. For additional details, see the users manual chapter on 1705 matrices and the file $(PETSC_DIR)/Performance. 1706 1707 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1708 @*/ 1709 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 1710 { 1711 Mat B; 1712 Mat_SeqBAIJ *b; 1713 int i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs; 1714 1715 if (mbs*bs!=m || nbs*bs!=n) 1716 SETERRQ(1,"MatCreateSeqBAIJ:Number rows, cols must be divisible by blocksize"); 1717 1718 *A = 0; 1719 PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm); 1720 PLogObjectCreate(B); 1721 B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b); 1722 PetscMemzero(b,sizeof(Mat_SeqBAIJ)); 1723 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 1724 ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr); 1725 if (!flg) { 1726 switch (bs) { 1727 case 1: 1728 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1729 B->ops.solve = MatSolve_SeqBAIJ_1; 1730 B->ops.mult = MatMult_SeqBAIJ_1; 1731 B->ops.multadd = MatMultAdd_SeqBAIJ_1; 1732 break; 1733 case 2: 1734 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 1735 B->ops.solve = MatSolve_SeqBAIJ_2; 1736 B->ops.mult = MatMult_SeqBAIJ_2; 1737 B->ops.multadd = MatMultAdd_SeqBAIJ_2; 1738 break; 1739 case 3: 1740 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 1741 B->ops.solve = MatSolve_SeqBAIJ_3; 1742 B->ops.mult = MatMult_SeqBAIJ_3; 1743 B->ops.multadd = MatMultAdd_SeqBAIJ_3; 1744 break; 1745 case 4: 1746 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 1747 B->ops.solve = MatSolve_SeqBAIJ_4; 1748 B->ops.mult = MatMult_SeqBAIJ_4; 1749 B->ops.multadd = MatMultAdd_SeqBAIJ_4; 1750 break; 1751 case 5: 1752 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 1753 B->ops.solve = MatSolve_SeqBAIJ_5; 1754 B->ops.mult = MatMult_SeqBAIJ_5; 1755 B->ops.multadd = MatMultAdd_SeqBAIJ_5; 1756 break; 1757 } 1758 } 1759 B->destroy = MatDestroy_SeqBAIJ; 1760 B->view = MatView_SeqBAIJ; 1761 B->factor = 0; 1762 B->lupivotthreshold = 1.0; 1763 b->row = 0; 1764 b->col = 0; 1765 b->reallocs = 0; 1766 1767 b->m = m; B->m = m; B->M = m; 1768 b->n = n; B->n = n; B->N = n; 1769 b->mbs = mbs; 1770 b->nbs = nbs; 1771 b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax); 1772 if (nnz == PETSC_NULL) { 1773 if (nz == PETSC_DEFAULT) nz = 5; 1774 else if (nz <= 0) nz = 1; 1775 for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 1776 nz = nz*mbs; 1777 } 1778 else { 1779 nz = 0; 1780 for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 1781 } 1782 1783 /* allocate the matrix space */ 1784 len = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int); 1785 b->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a); 1786 PetscMemzero(b->a,nz*bs2*sizeof(Scalar)); 1787 b->j = (int *) (b->a + nz*bs2); 1788 PetscMemzero(b->j,nz*sizeof(int)); 1789 b->i = b->j + nz; 1790 b->singlemalloc = 1; 1791 1792 b->i[0] = 0; 1793 for (i=1; i<mbs+1; i++) { 1794 b->i[i] = b->i[i-1] + b->imax[i-1]; 1795 } 1796 1797 /* b->ilen will count nonzeros in each block row so far. */ 1798 b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 1799 PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 1800 for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 1801 1802 b->bs = bs; 1803 b->bs2 = bs2; 1804 b->mbs = mbs; 1805 b->nz = 0; 1806 b->maxnz = nz*bs2; 1807 b->sorted = 0; 1808 b->roworiented = 1; 1809 b->nonew = 0; 1810 b->diag = 0; 1811 b->solve_work = 0; 1812 b->mult_work = 0; 1813 b->spptr = 0; 1814 *A = B; 1815 ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr); 1816 if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); } 1817 return 0; 1818 } 1819 1820 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues) 1821 { 1822 Mat C; 1823 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 1824 int i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 1825 1826 if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix"); 1827 1828 *B = 0; 1829 PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm); 1830 PLogObjectCreate(C); 1831 C->data = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c); 1832 PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps)); 1833 C->destroy = MatDestroy_SeqBAIJ; 1834 C->view = MatView_SeqBAIJ; 1835 C->factor = A->factor; 1836 c->row = 0; 1837 c->col = 0; 1838 C->assembled = PETSC_TRUE; 1839 1840 c->m = C->m = a->m; 1841 c->n = C->n = a->n; 1842 C->M = a->m; 1843 C->N = a->n; 1844 1845 c->bs = a->bs; 1846 c->bs2 = a->bs2; 1847 c->mbs = a->mbs; 1848 c->nbs = a->nbs; 1849 1850 c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax); 1851 c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen); 1852 for ( i=0; i<mbs; i++ ) { 1853 c->imax[i] = a->imax[i]; 1854 c->ilen[i] = a->ilen[i]; 1855 } 1856 1857 /* allocate the matrix space */ 1858 c->singlemalloc = 1; 1859 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int)); 1860 c->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a); 1861 c->j = (int *) (c->a + nz*bs2); 1862 c->i = c->j + nz; 1863 PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int)); 1864 if (mbs > 0) { 1865 PetscMemcpy(c->j,a->j,nz*sizeof(int)); 1866 if (cpvalues == COPY_VALUES) { 1867 PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar)); 1868 } 1869 } 1870 1871 PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 1872 c->sorted = a->sorted; 1873 c->roworiented = a->roworiented; 1874 c->nonew = a->nonew; 1875 1876 if (a->diag) { 1877 c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag); 1878 PLogObjectMemory(C,(mbs+1)*sizeof(int)); 1879 for ( i=0; i<mbs; i++ ) { 1880 c->diag[i] = a->diag[i]; 1881 } 1882 } 1883 else c->diag = 0; 1884 c->nz = a->nz; 1885 c->maxnz = a->maxnz; 1886 c->solve_work = 0; 1887 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1888 c->mult_work = 0; 1889 *B = C; 1890 return 0; 1891 } 1892 1893 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 1894 { 1895 Mat_SeqBAIJ *a; 1896 Mat B; 1897 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg; 1898 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1899 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1900 int *masked, nmask,tmp,bs2,ishift; 1901 Scalar *aa; 1902 MPI_Comm comm = ((PetscObject) viewer)->comm; 1903 1904 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 1905 bs2 = bs*bs; 1906 1907 MPI_Comm_size(comm,&size); 1908 if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor"); 1909 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1910 ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr); 1911 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object"); 1912 M = header[1]; N = header[2]; nz = header[3]; 1913 1914 if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices"); 1915 1916 /* 1917 This code adds extra rows to make sure the number of rows is 1918 divisible by the blocksize 1919 */ 1920 mbs = M/bs; 1921 extra_rows = bs - M + bs*(mbs); 1922 if (extra_rows == bs) extra_rows = 0; 1923 else mbs++; 1924 if (extra_rows) { 1925 PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize"); 1926 } 1927 1928 /* read in row lengths */ 1929 rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 1930 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 1931 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 1932 1933 /* read in column indices */ 1934 jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj); 1935 ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr); 1936 for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 1937 1938 /* loop over row lengths determining block row lengths */ 1939 browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 1940 PetscMemzero(browlengths,mbs*sizeof(int)); 1941 mask = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask); 1942 PetscMemzero(mask,mbs*sizeof(int)); 1943 masked = mask + mbs; 1944 rowcount = 0; nzcount = 0; 1945 for ( i=0; i<mbs; i++ ) { 1946 nmask = 0; 1947 for ( j=0; j<bs; j++ ) { 1948 kmax = rowlengths[rowcount]; 1949 for ( k=0; k<kmax; k++ ) { 1950 tmp = jj[nzcount++]/bs; 1951 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1952 } 1953 rowcount++; 1954 } 1955 browlengths[i] += nmask; 1956 /* zero out the mask elements we set */ 1957 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 1958 } 1959 1960 /* create our matrix */ 1961 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A); 1962 CHKERRQ(ierr); 1963 B = *A; 1964 a = (Mat_SeqBAIJ *) B->data; 1965 1966 /* set matrix "i" values */ 1967 a->i[0] = 0; 1968 for ( i=1; i<= mbs; i++ ) { 1969 a->i[i] = a->i[i-1] + browlengths[i-1]; 1970 a->ilen[i-1] = browlengths[i-1]; 1971 } 1972 a->nz = 0; 1973 for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 1974 1975 /* read in nonzero values */ 1976 aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 1977 ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr); 1978 for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 1979 1980 /* set "a" and "j" values into matrix */ 1981 nzcount = 0; jcount = 0; 1982 for ( i=0; i<mbs; i++ ) { 1983 nzcountb = nzcount; 1984 nmask = 0; 1985 for ( j=0; j<bs; j++ ) { 1986 kmax = rowlengths[i*bs+j]; 1987 for ( k=0; k<kmax; k++ ) { 1988 tmp = jj[nzcount++]/bs; 1989 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1990 } 1991 rowcount++; 1992 } 1993 /* sort the masked values */ 1994 PetscSortInt(nmask,masked); 1995 1996 /* set "j" values into matrix */ 1997 maskcount = 1; 1998 for ( j=0; j<nmask; j++ ) { 1999 a->j[jcount++] = masked[j]; 2000 mask[masked[j]] = maskcount++; 2001 } 2002 /* set "a" values into matrix */ 2003 ishift = bs2*a->i[i]; 2004 for ( j=0; j<bs; j++ ) { 2005 kmax = rowlengths[i*bs+j]; 2006 for ( k=0; k<kmax; k++ ) { 2007 tmp = jj[nzcountb]/bs ; 2008 block = mask[tmp] - 1; 2009 point = jj[nzcountb] - bs*tmp; 2010 idx = ishift + bs2*block + j + bs*point; 2011 a->a[idx] = aa[nzcountb++]; 2012 } 2013 } 2014 /* zero out the mask elements we set */ 2015 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 2016 } 2017 if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix"); 2018 2019 PetscFree(rowlengths); 2020 PetscFree(browlengths); 2021 PetscFree(aa); 2022 PetscFree(jj); 2023 PetscFree(mask); 2024 2025 B->assembled = PETSC_TRUE; 2026 2027 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr); 2028 if (flg) { 2029 Viewer tviewer; 2030 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2031 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO,0);CHKERRQ(ierr); 2032 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2033 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2034 } 2035 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr); 2036 if (flg) { 2037 Viewer tviewer; 2038 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2039 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr); 2040 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2041 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2042 } 2043 ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr); 2044 if (flg) { 2045 Viewer tviewer; 2046 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2047 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2048 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2049 } 2050 ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr); 2051 if (flg) { 2052 Viewer tviewer; 2053 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2054 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_MATLAB,"M");CHKERRQ(ierr); 2055 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2056 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2057 } 2058 ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr); 2059 if (flg) { 2060 Viewer tviewer; 2061 ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr); 2062 ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr); 2063 ierr = ViewerFlush(tviewer); CHKERRQ(ierr); 2064 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2065 } 2066 return 0; 2067 } 2068 2069 2070 2071