1 #ifndef lint 2 static char vcid[] = "$Id: baij.c,v 1.65 1996/08/08 14:43:31 bsmith Exp bsmith $"; 3 #endif 4 5 /* 6 Defines the basic matrix operations for the BAIJ (compressed row) 7 matrix storage format. 8 */ 9 #include "src/mat/impls/baij/seq/baij.h" 10 #include "src/vec/vecimpl.h" 11 #include "src/inline/spops.h" 12 #include "petsc.h" 13 14 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 15 16 static int MatGetReordering_SeqBAIJ(Mat A,MatReordering type,IS *rperm,IS *cperm) 17 { 18 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 19 int ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift; 20 21 /* 22 this is tacky: In the future when we have written special factorization 23 and solve routines for the identity permutation we should use a 24 stride index set instead of the general one. 25 */ 26 if (type == ORDER_NATURAL) { 27 idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx); 28 for ( i=0; i<n; i++ ) idx[i] = i; 29 ierr = ISCreateGeneral(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr); 30 ierr = ISCreateGeneral(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr); 31 PetscFree(idx); 32 ISSetPermutation(*rperm); 33 ISSetPermutation(*cperm); 34 ISSetIdentity(*rperm); 35 ISSetIdentity(*cperm); 36 return 0; 37 } 38 39 MatReorderingRegisterAll(); 40 ishift = 0; 41 oshift = -MatReorderingIndexShift[(int)type]; 42 if (MatReorderingRequiresSymmetric[(int)type]) { 43 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,ishift,oshift,&ia,&ja);CHKERRQ(ierr); 44 ierr = MatGetReordering_IJ(n,ia,ja,type,rperm,cperm); CHKERRQ(ierr); 45 PetscFree(ia); PetscFree(ja); 46 } else { 47 if (ishift == oshift) { 48 ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 49 } 50 else if (ishift == -1) { 51 /* temporarily subtract 1 from i and j indices */ 52 int nz = a->i[n] - 1; 53 for ( i=0; i<nz; i++ ) a->j[i]--; 54 for ( i=0; i<n+1; i++ ) a->i[i]--; 55 ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 56 for ( i=0; i<nz; i++ ) a->j[i]++; 57 for ( i=0; i<n+1; i++ ) a->i[i]++; 58 } else { 59 /* temporarily add 1 to i and j indices */ 60 int nz = a->i[n] - 1; 61 for ( i=0; i<nz; i++ ) a->j[i]++; 62 for ( i=0; i<n+1; i++ ) a->i[i]++; 63 ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 64 for ( i=0; i<nz; i++ ) a->j[i]--; 65 for ( i=0; i<n+1; i++ ) a->i[i]--; 66 } 67 } 68 return 0; 69 } 70 71 /* 72 Adds diagonal pointers to sparse matrix structure. 73 */ 74 75 int MatMarkDiag_SeqBAIJ(Mat A) 76 { 77 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 78 int i,j, *diag, m = a->mbs; 79 80 diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag); 81 PLogObjectMemory(A,(m+1)*sizeof(int)); 82 for ( i=0; i<m; i++ ) { 83 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 84 if (a->j[j] == i) { 85 diag[i] = j; 86 break; 87 } 88 } 89 } 90 a->diag = diag; 91 return 0; 92 } 93 94 #include "draw.h" 95 #include "pinclude/pviewer.h" 96 #include "sys.h" 97 98 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 99 { 100 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 101 int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 102 Scalar *aa; 103 104 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 105 col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 106 col_lens[0] = MAT_COOKIE; 107 col_lens[1] = a->m; 108 col_lens[2] = a->n; 109 col_lens[3] = a->nz*bs2; 110 111 /* store lengths of each row and write (including header) to file */ 112 count = 0; 113 for ( i=0; i<a->mbs; i++ ) { 114 for ( j=0; j<bs; j++ ) { 115 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 116 } 117 } 118 ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr); 119 PetscFree(col_lens); 120 121 /* store column indices (zero start index) */ 122 jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj); 123 count = 0; 124 for ( i=0; i<a->mbs; i++ ) { 125 for ( j=0; j<bs; j++ ) { 126 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 127 for ( l=0; l<bs; l++ ) { 128 jj[count++] = bs*a->j[k] + l; 129 } 130 } 131 } 132 } 133 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr); 134 PetscFree(jj); 135 136 /* store nonzero values */ 137 aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa); 138 count = 0; 139 for ( i=0; i<a->mbs; i++ ) { 140 for ( j=0; j<bs; j++ ) { 141 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 142 for ( l=0; l<bs; l++ ) { 143 aa[count++] = a->a[bs2*k + l*bs + j]; 144 } 145 } 146 } 147 } 148 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr); 149 PetscFree(aa); 150 return 0; 151 } 152 153 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 154 { 155 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 156 int ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2; 157 FILE *fd; 158 char *outputname; 159 160 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 161 ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr); 162 ierr = ViewerGetFormat(viewer,&format); 163 if (format == ASCII_FORMAT_INFO || format == ASCII_FORMAT_INFO_DETAILED) { 164 fprintf(fd," block size is %d\n",bs); 165 } 166 else if (format == ASCII_FORMAT_MATLAB) { 167 SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported"); 168 } 169 else if (format == ASCII_FORMAT_COMMON) { 170 for ( i=0; i<a->mbs; i++ ) { 171 for ( j=0; j<bs; j++ ) { 172 fprintf(fd,"row %d:",i*bs+j); 173 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 174 for ( l=0; l<bs; l++ ) { 175 #if defined(PETSC_COMPLEX) 176 if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0) 177 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 178 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 179 else if (real(a->a[bs2*k + l*bs + j]) != 0.0) 180 fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 181 #else 182 if (a->a[bs2*k + l*bs + j] != 0.0) 183 fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 184 #endif 185 } 186 } 187 fprintf(fd,"\n"); 188 } 189 } 190 } 191 else { 192 for ( i=0; i<a->mbs; i++ ) { 193 for ( j=0; j<bs; j++ ) { 194 fprintf(fd,"row %d:",i*bs+j); 195 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 196 for ( l=0; l<bs; l++ ) { 197 #if defined(PETSC_COMPLEX) 198 if (imag(a->a[bs2*k + l*bs + j]) != 0.0) { 199 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 200 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 201 } 202 else { 203 fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 204 } 205 #else 206 fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 207 #endif 208 } 209 } 210 fprintf(fd,"\n"); 211 } 212 } 213 } 214 fflush(fd); 215 return 0; 216 } 217 218 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer) 219 { 220 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *) A->data; 221 int row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2; 222 double xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r; 223 Scalar *aa; 224 Draw draw; 225 DrawButton button; 226 PetscTruth isnull; 227 228 ViewerDrawGetDraw(viewer,&draw); 229 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 230 231 xr = a->n; yr = a->m; h = yr/10.0; w = xr/10.0; 232 xr += w; yr += h; xl = -w; yl = -h; 233 ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr); 234 /* loop over matrix elements drawing boxes */ 235 color = DRAW_BLUE; 236 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 237 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 238 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 239 x_l = a->j[j]*bs; x_r = x_l + 1.0; 240 aa = a->a + j*bs2; 241 for ( k=0; k<bs; k++ ) { 242 for ( l=0; l<bs; l++ ) { 243 if (PetscReal(*aa++) >= 0.) continue; 244 DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 245 } 246 } 247 } 248 } 249 color = DRAW_CYAN; 250 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 251 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 252 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 253 x_l = a->j[j]*bs; x_r = x_l + 1.0; 254 aa = a->a + j*bs2; 255 for ( k=0; k<bs; k++ ) { 256 for ( l=0; l<bs; l++ ) { 257 if (PetscReal(*aa++) != 0.) continue; 258 DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 259 } 260 } 261 } 262 } 263 264 color = DRAW_RED; 265 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 266 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 267 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 268 x_l = a->j[j]*bs; x_r = x_l + 1.0; 269 aa = a->a + j*bs2; 270 for ( k=0; k<bs; k++ ) { 271 for ( l=0; l<bs; l++ ) { 272 if (PetscReal(*aa++) <= 0.) continue; 273 DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 274 } 275 } 276 } 277 } 278 279 DrawFlush(draw); 280 DrawGetPause(draw,&pause); 281 if (pause >= 0) { PetscSleep(pause); return 0;} 282 283 /* allow the matrix to zoom or shrink */ 284 ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0); 285 while (button != BUTTON_RIGHT) { 286 DrawClear(draw); 287 if (button == BUTTON_LEFT) scale = .5; 288 else if (button == BUTTON_CENTER) scale = 2.; 289 xl = scale*(xl + w - xc) + xc - w*scale; 290 xr = scale*(xr - w - xc) + xc + w*scale; 291 yl = scale*(yl + h - yc) + yc - h*scale; 292 yr = scale*(yr - h - yc) + yc + h*scale; 293 w *= scale; h *= scale; 294 ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr); 295 color = DRAW_BLUE; 296 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 297 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 298 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 299 x_l = a->j[j]*bs; x_r = x_l + 1.0; 300 aa = a->a + j*bs2; 301 for ( k=0; k<bs; k++ ) { 302 for ( l=0; l<bs; l++ ) { 303 if (PetscReal(*aa++) >= 0.) continue; 304 DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 305 } 306 } 307 } 308 } 309 color = DRAW_CYAN; 310 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 311 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 312 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 313 x_l = a->j[j]*bs; x_r = x_l + 1.0; 314 aa = a->a + j*bs2; 315 for ( k=0; k<bs; k++ ) { 316 for ( l=0; l<bs; l++ ) { 317 if (PetscReal(*aa++) != 0.) continue; 318 DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 319 } 320 } 321 } 322 } 323 324 color = DRAW_RED; 325 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 326 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 327 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 328 x_l = a->j[j]*bs; x_r = x_l + 1.0; 329 aa = a->a + j*bs2; 330 for ( k=0; k<bs; k++ ) { 331 for ( l=0; l<bs; l++ ) { 332 if (PetscReal(*aa++) <= 0.) continue; 333 DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 334 } 335 } 336 } 337 } 338 ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0); 339 } 340 return 0; 341 } 342 343 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer) 344 { 345 Mat A = (Mat) obj; 346 ViewerType vtype; 347 int ierr; 348 349 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 350 if (vtype == MATLAB_VIEWER) { 351 SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported"); 352 } 353 else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){ 354 return MatView_SeqBAIJ_ASCII(A,viewer); 355 } 356 else if (vtype == BINARY_FILE_VIEWER) { 357 return MatView_SeqBAIJ_Binary(A,viewer); 358 } 359 else if (vtype == DRAW_VIEWER) { 360 return MatView_SeqBAIJ_Draw(A,viewer); 361 } 362 return 0; 363 } 364 365 #define CHUNKSIZE 10 366 367 /* This version has row oriented v */ 368 static int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 369 { 370 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 371 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 372 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 373 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 374 int ridx,cidx,bs2=a->bs2; 375 Scalar *ap,value,*aa=a->a,*bap; 376 377 for ( k=0; k<m; k++ ) { /* loop over added rows */ 378 row = im[k]; brow = row/bs; 379 if (row < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative row"); 380 if (row >= a->m) SETERRQ(1,"MatSetValues_SeqBAIJ:Row too large"); 381 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 382 rmax = imax[brow]; nrow = ailen[brow]; 383 low = 0; 384 for ( l=0; l<n; l++ ) { /* loop over added columns */ 385 if (in[l] < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative column"); 386 if (in[l] >= a->n) SETERRQ(1,"MatSetValues_SeqBAIJ:Column too large"); 387 col = in[l]; bcol = col/bs; 388 ridx = row % bs; cidx = col % bs; 389 if (roworiented) { 390 value = *v++; 391 } 392 else { 393 value = v[k + l*m]; 394 } 395 if (!sorted) low = 0; high = nrow; 396 while (high-low > 5) { 397 t = (low+high)/2; 398 if (rp[t] > bcol) high = t; 399 else low = t; 400 } 401 for ( i=low; i<high; i++ ) { 402 if (rp[i] > bcol) break; 403 if (rp[i] == bcol) { 404 bap = ap + bs2*i + bs*cidx + ridx; 405 if (is == ADD_VALUES) *bap += value; 406 else *bap = value; 407 goto noinsert; 408 } 409 } 410 if (nonew) goto noinsert; 411 if (nrow >= rmax) { 412 /* there is no extra room in row, therefore enlarge */ 413 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 414 Scalar *new_a; 415 416 /* malloc new storage space */ 417 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 418 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 419 new_j = (int *) (new_a + bs2*new_nz); 420 new_i = new_j + new_nz; 421 422 /* copy over old data into new slots */ 423 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} 424 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 425 PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); 426 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 427 PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, 428 len*sizeof(int)); 429 PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); 430 PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 431 PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), 432 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); 433 /* free up old matrix storage */ 434 PetscFree(a->a); 435 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 436 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 437 a->singlemalloc = 1; 438 439 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 440 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 441 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 442 a->maxnz += bs2*CHUNKSIZE; 443 a->reallocs++; 444 a->nz++; 445 } 446 N = nrow++ - 1; 447 /* shift up all the later entries in this row */ 448 for ( ii=N; ii>=i; ii-- ) { 449 rp[ii+1] = rp[ii]; 450 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 451 } 452 if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 453 rp[i] = bcol; 454 ap[bs2*i + bs*cidx + ridx] = value; 455 noinsert:; 456 low = i; 457 } 458 ailen[brow] = nrow; 459 } 460 return 0; 461 } 462 463 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 464 { 465 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 466 *m = a->m; *n = a->n; 467 return 0; 468 } 469 470 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 471 { 472 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 473 *m = 0; *n = a->m; 474 return 0; 475 } 476 477 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 478 { 479 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 480 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 481 Scalar *aa,*v_i,*aa_i; 482 483 bs = a->bs; 484 ai = a->i; 485 aj = a->j; 486 aa = a->a; 487 bs2 = a->bs2; 488 489 if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range"); 490 491 bn = row/bs; /* Block number */ 492 bp = row % bs; /* Block Position */ 493 M = ai[bn+1] - ai[bn]; 494 *nz = bs*M; 495 496 if (v) { 497 *v = 0; 498 if (*nz) { 499 *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v); 500 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 501 v_i = *v + i*bs; 502 aa_i = aa + bs2*(ai[bn] + i); 503 for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];} 504 } 505 } 506 } 507 508 if (idx) { 509 *idx = 0; 510 if (*nz) { 511 *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx); 512 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 513 idx_i = *idx + i*bs; 514 itmp = bs*aj[ai[bn] + i]; 515 for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;} 516 } 517 } 518 } 519 return 0; 520 } 521 522 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 523 { 524 if (idx) {if (*idx) PetscFree(*idx);} 525 if (v) {if (*v) PetscFree(*v);} 526 return 0; 527 } 528 529 static int MatTranspose_SeqBAIJ(Mat A,Mat *B) 530 { 531 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 532 Mat C; 533 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 534 int *rows,*cols,bs2=a->bs2; 535 Scalar *array=a->a; 536 537 if (B==PETSC_NULL && mbs!=nbs) 538 SETERRQ(1,"MatTranspose_SeqBAIJ:Square matrix only for in-place"); 539 col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col); 540 PetscMemzero(col,(1+nbs)*sizeof(int)); 541 542 for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1; 543 ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr); 544 PetscFree(col); 545 rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows); 546 cols = rows + bs; 547 for ( i=0; i<mbs; i++ ) { 548 cols[0] = i*bs; 549 for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1; 550 len = ai[i+1] - ai[i]; 551 for ( j=0; j<len; j++ ) { 552 rows[0] = (*aj++)*bs; 553 for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1; 554 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr); 555 array += bs2; 556 } 557 } 558 PetscFree(rows); 559 560 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 561 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 562 563 if (B != PETSC_NULL) { 564 *B = C; 565 } else { 566 /* This isn't really an in-place transpose */ 567 PetscFree(a->a); 568 if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);} 569 if (a->diag) PetscFree(a->diag); 570 if (a->ilen) PetscFree(a->ilen); 571 if (a->imax) PetscFree(a->imax); 572 if (a->solve_work) PetscFree(a->solve_work); 573 PetscFree(a); 574 PetscMemcpy(A,C,sizeof(struct _Mat)); 575 PetscHeaderDestroy(C); 576 } 577 return 0; 578 } 579 580 581 static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 582 { 583 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 584 int fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax; 585 int m = a->m,*ip, N, *ailen = a->ilen; 586 int mbs = a->mbs, bs2 = a->bs2; 587 Scalar *aa = a->a, *ap; 588 589 if (mode == MAT_FLUSH_ASSEMBLY) return 0; 590 591 for ( i=1; i<mbs; i++ ) { 592 /* move each row back by the amount of empty slots (fshift) before it*/ 593 fshift += imax[i-1] - ailen[i-1]; 594 if (fshift) { 595 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 596 N = ailen[i]; 597 for ( j=0; j<N; j++ ) { 598 ip[j-fshift] = ip[j]; 599 PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar)); 600 } 601 } 602 ai[i] = ai[i-1] + ailen[i-1]; 603 } 604 if (mbs) { 605 fshift += imax[mbs-1] - ailen[mbs-1]; 606 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 607 } 608 /* reset ilen and imax for each row */ 609 for ( i=0; i<mbs; i++ ) { 610 ailen[i] = imax[i] = ai[i+1] - ai[i]; 611 } 612 a->nz = ai[mbs]; 613 614 /* diagonals may have moved, so kill the diagonal pointers */ 615 if (fshift && a->diag) { 616 PetscFree(a->diag); 617 PLogObjectMemory(A,-(m+1)*sizeof(int)); 618 a->diag = 0; 619 } 620 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Unneed storage space %d used %d, rows %d, block size %d\n", 621 fshift*bs2,a->nz*bs2,m,a->bs); 622 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues %d\n", 623 a->reallocs); 624 return 0; 625 } 626 627 static int MatZeroEntries_SeqBAIJ(Mat A) 628 { 629 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 630 PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar)); 631 return 0; 632 } 633 634 int MatDestroy_SeqBAIJ(PetscObject obj) 635 { 636 Mat A = (Mat) obj; 637 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 638 639 #if defined(PETSC_LOG) 640 PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 641 #endif 642 PetscFree(a->a); 643 if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);} 644 if (a->diag) PetscFree(a->diag); 645 if (a->ilen) PetscFree(a->ilen); 646 if (a->imax) PetscFree(a->imax); 647 if (a->solve_work) PetscFree(a->solve_work); 648 if (a->mult_work) PetscFree(a->mult_work); 649 PetscFree(a); 650 PLogObjectDestroy(A); 651 PetscHeaderDestroy(A); 652 return 0; 653 } 654 655 static int MatSetOption_SeqBAIJ(Mat A,MatOption op) 656 { 657 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 658 if (op == MAT_ROW_ORIENTED) a->roworiented = 1; 659 else if (op == MAT_COLUMN_ORIENTED) a->roworiented = 0; 660 else if (op == MAT_COLUMNS_SORTED) a->sorted = 1; 661 else if (op == MAT_NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 662 else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 663 else if (op == MAT_ROWS_SORTED || 664 op == MAT_SYMMETRIC || 665 op == MAT_STRUCTURALLY_SYMMETRIC || 666 op == MAT_YES_NEW_DIAGONALS) 667 PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n"); 668 else if (op == MAT_NO_NEW_DIAGONALS) 669 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:MAT_NO_NEW_DIAGONALS");} 670 else 671 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");} 672 return 0; 673 } 674 675 676 /* -------------------------------------------------------*/ 677 /* Should check that shapes of vectors and matrices match */ 678 /* -------------------------------------------------------*/ 679 #include "pinclude/plapack.h" 680 681 static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz) 682 { 683 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 684 register Scalar *x,*z,*v,sum; 685 int mbs=a->mbs,i,*idx,*ii,n; 686 687 VecGetArray_Fast(xx,x); 688 VecGetArray_Fast(zz,z); 689 690 idx = a->j; 691 v = a->a; 692 ii = a->i; 693 694 for ( i=0; i<mbs; i++ ) { 695 n = ii[1] - ii[0]; ii++; 696 sum = 0.0; 697 while (n--) sum += *v++ * x[*idx++]; 698 z[i] = sum; 699 } 700 VecRestoreArray_Fast(xx,x); 701 VecRestoreArray_Fast(zz,z); 702 PLogFlops(2*a->nz - a->m); 703 return 0; 704 } 705 706 static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz) 707 { 708 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 709 register Scalar *x,*z,*v,*xb,sum1,sum2; 710 register Scalar x1,x2; 711 int mbs=a->mbs,i,*idx,*ii; 712 int j,n; 713 714 VecGetArray_Fast(xx,x); 715 VecGetArray_Fast(zz,z); 716 717 idx = a->j; 718 v = a->a; 719 ii = a->i; 720 721 for ( i=0; i<mbs; i++ ) { 722 n = ii[1] - ii[0]; ii++; 723 sum1 = 0.0; sum2 = 0.0; 724 for ( j=0; j<n; j++ ) { 725 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 726 sum1 += v[0]*x1 + v[2]*x2; 727 sum2 += v[1]*x1 + v[3]*x2; 728 v += 4; 729 } 730 z[0] = sum1; z[1] = sum2; 731 z += 2; 732 } 733 VecRestoreArray_Fast(xx,x); 734 VecRestoreArray_Fast(zz,z); 735 PLogFlops(4*a->nz - a->m); 736 return 0; 737 } 738 739 static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz) 740 { 741 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 742 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 743 int mbs=a->mbs,i,*idx,*ii,j,n; 744 745 VecGetArray_Fast(xx,x); 746 VecGetArray_Fast(zz,z); 747 748 idx = a->j; 749 v = a->a; 750 ii = a->i; 751 752 for ( i=0; i<mbs; i++ ) { 753 n = ii[1] - ii[0]; ii++; 754 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 755 for ( j=0; j<n; j++ ) { 756 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 757 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 758 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 759 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 760 v += 9; 761 } 762 z[0] = sum1; z[1] = sum2; z[2] = sum3; 763 z += 3; 764 } 765 VecRestoreArray_Fast(xx,x); 766 VecRestoreArray_Fast(zz,z); 767 PLogFlops(18*a->nz - a->m); 768 return 0; 769 } 770 771 static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz) 772 { 773 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 774 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4; 775 register Scalar x1,x2,x3,x4; 776 int mbs=a->mbs,i,*idx,*ii; 777 int j,n; 778 779 VecGetArray_Fast(xx,x); 780 VecGetArray_Fast(zz,z); 781 782 idx = a->j; 783 v = a->a; 784 ii = a->i; 785 786 for ( i=0; i<mbs; i++ ) { 787 n = ii[1] - ii[0]; ii++; 788 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 789 for ( j=0; j<n; j++ ) { 790 xb = x + 4*(*idx++); 791 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 792 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 793 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 794 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 795 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 796 v += 16; 797 } 798 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 799 z += 4; 800 } 801 VecRestoreArray_Fast(xx,x); 802 VecRestoreArray_Fast(zz,z); 803 PLogFlops(32*a->nz - a->m); 804 return 0; 805 } 806 807 static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz) 808 { 809 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 810 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5; 811 register Scalar x1,x2,x3,x4,x5; 812 int mbs=a->mbs,i,*idx,*ii,j,n; 813 814 VecGetArray_Fast(xx,x); 815 VecGetArray_Fast(zz,z); 816 817 idx = a->j; 818 v = a->a; 819 ii = a->i; 820 821 for ( i=0; i<mbs; i++ ) { 822 n = ii[1] - ii[0]; ii++; 823 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 824 for ( j=0; j<n; j++ ) { 825 xb = x + 5*(*idx++); 826 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 827 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 828 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 829 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 830 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 831 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 832 v += 25; 833 } 834 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 835 z += 5; 836 } 837 VecRestoreArray_Fast(xx,x); 838 VecRestoreArray_Fast(zz,z); 839 PLogFlops(50*a->nz - a->m); 840 return 0; 841 } 842 843 static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz) 844 { 845 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 846 register Scalar *x,*z,*v,*xb; 847 int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2; 848 int _One = 1,ncols,k; 849 Scalar _DOne = 1.0,*work,*workt,_DZero = 0.0; 850 851 VecGetArray_Fast(xx,x); 852 VecGetArray_Fast(zz,z); 853 854 idx = a->j; 855 v = a->a; 856 ii = a->i; 857 858 859 if (!a->mult_work) { 860 k = PetscMax(a->m,a->n); 861 a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work); 862 } 863 work = a->mult_work; 864 for ( i=0; i<mbs; i++ ) { 865 n = ii[1] - ii[0]; ii++; 866 ncols = n*bs; 867 workt = work; 868 for ( j=0; j<n; j++ ) { 869 xb = x + bs*(*idx++); 870 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 871 workt += bs; 872 } 873 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One); 874 v += n*bs2; 875 z += bs; 876 } 877 VecRestoreArray_Fast(xx,x); 878 VecRestoreArray_Fast(zz,z); 879 PLogFlops(2*a->nz*bs2 - a->m); 880 return 0; 881 } 882 883 static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 884 { 885 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 886 register Scalar *x,*y,*z,*v,sum; 887 int mbs=a->mbs,i,*idx,*ii,n; 888 889 VecGetArray_Fast(xx,x); 890 VecGetArray_Fast(yy,y); 891 VecGetArray_Fast(zz,z); 892 893 idx = a->j; 894 v = a->a; 895 ii = a->i; 896 897 for ( i=0; i<mbs; i++ ) { 898 n = ii[1] - ii[0]; ii++; 899 sum = y[i]; 900 while (n--) sum += *v++ * x[*idx++]; 901 z[i] = sum; 902 } 903 VecRestoreArray_Fast(xx,x); 904 VecRestoreArray_Fast(yy,y); 905 VecRestoreArray_Fast(zz,z); 906 PLogFlops(2*a->nz); 907 return 0; 908 } 909 910 static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 911 { 912 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 913 register Scalar *x,*y,*z,*v,*xb,sum1,sum2; 914 register Scalar x1,x2; 915 int mbs=a->mbs,i,*idx,*ii; 916 int j,n; 917 918 VecGetArray_Fast(xx,x); 919 VecGetArray_Fast(yy,y); 920 VecGetArray_Fast(zz,z); 921 922 idx = a->j; 923 v = a->a; 924 ii = a->i; 925 926 for ( i=0; i<mbs; i++ ) { 927 n = ii[1] - ii[0]; ii++; 928 sum1 = y[0]; sum2 = y[1]; 929 for ( j=0; j<n; j++ ) { 930 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 931 sum1 += v[0]*x1 + v[2]*x2; 932 sum2 += v[1]*x1 + v[3]*x2; 933 v += 4; 934 } 935 z[0] = sum1; z[1] = sum2; 936 z += 2; y += 2; 937 } 938 VecRestoreArray_Fast(xx,x); 939 VecRestoreArray_Fast(yy,y); 940 VecRestoreArray_Fast(zz,z); 941 PLogFlops(4*a->nz); 942 return 0; 943 } 944 945 static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 946 { 947 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 948 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 949 int mbs=a->mbs,i,*idx,*ii,j,n; 950 951 VecGetArray_Fast(xx,x); 952 VecGetArray_Fast(yy,y); 953 VecGetArray_Fast(zz,z); 954 955 idx = a->j; 956 v = a->a; 957 ii = a->i; 958 959 for ( i=0; i<mbs; i++ ) { 960 n = ii[1] - ii[0]; ii++; 961 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; 962 for ( j=0; j<n; j++ ) { 963 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 964 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 965 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 966 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 967 v += 9; 968 } 969 z[0] = sum1; z[1] = sum2; z[2] = sum3; 970 z += 3; y += 3; 971 } 972 VecRestoreArray_Fast(xx,x); 973 VecRestoreArray_Fast(yy,y); 974 VecRestoreArray_Fast(zz,z); 975 PLogFlops(18*a->nz); 976 return 0; 977 } 978 979 static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 980 { 981 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 982 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4; 983 register Scalar x1,x2,x3,x4; 984 int mbs=a->mbs,i,*idx,*ii; 985 int j,n; 986 987 VecGetArray_Fast(xx,x); 988 VecGetArray_Fast(yy,y); 989 VecGetArray_Fast(zz,z); 990 991 idx = a->j; 992 v = a->a; 993 ii = a->i; 994 995 for ( i=0; i<mbs; i++ ) { 996 n = ii[1] - ii[0]; ii++; 997 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; 998 for ( j=0; j<n; j++ ) { 999 xb = x + 4*(*idx++); 1000 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1001 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 1002 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 1003 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 1004 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 1005 v += 16; 1006 } 1007 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 1008 z += 4; y += 4; 1009 } 1010 VecRestoreArray_Fast(xx,x); 1011 VecRestoreArray_Fast(yy,y); 1012 VecRestoreArray_Fast(zz,z); 1013 PLogFlops(32*a->nz); 1014 return 0; 1015 } 1016 1017 static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 1018 { 1019 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1020 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5; 1021 register Scalar x1,x2,x3,x4,x5; 1022 int mbs=a->mbs,i,*idx,*ii,j,n; 1023 1024 VecGetArray_Fast(xx,x); 1025 VecGetArray_Fast(yy,y); 1026 VecGetArray_Fast(zz,z); 1027 1028 idx = a->j; 1029 v = a->a; 1030 ii = a->i; 1031 1032 for ( i=0; i<mbs; i++ ) { 1033 n = ii[1] - ii[0]; ii++; 1034 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; 1035 for ( j=0; j<n; j++ ) { 1036 xb = x + 5*(*idx++); 1037 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 1038 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 1039 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 1040 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 1041 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 1042 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 1043 v += 25; 1044 } 1045 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 1046 z += 5; y += 5; 1047 } 1048 VecRestoreArray_Fast(xx,x); 1049 VecRestoreArray_Fast(yy,y); 1050 VecRestoreArray_Fast(zz,z); 1051 PLogFlops(50*a->nz); 1052 return 0; 1053 } 1054 1055 static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 1056 { 1057 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1058 register Scalar *x,*z,*v,*xb; 1059 int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr; 1060 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1061 1062 if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 1063 1064 VecGetArray_Fast(xx,x); 1065 VecGetArray_Fast(zz,z); 1066 1067 idx = a->j; 1068 v = a->a; 1069 ii = a->i; 1070 1071 1072 if (!a->mult_work) { 1073 k = PetscMax(a->m,a->n); 1074 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work); 1075 } 1076 work = a->mult_work; 1077 for ( i=0; i<mbs; i++ ) { 1078 n = ii[1] - ii[0]; ii++; 1079 ncols = n*bs; 1080 workt = work; 1081 for ( j=0; j<n; j++ ) { 1082 xb = x + bs*(*idx++); 1083 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 1084 workt += bs; 1085 } 1086 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); 1087 v += n*bs2; 1088 z += bs; 1089 } 1090 VecRestoreArray_Fast(xx,x); 1091 VecRestoreArray_Fast(zz,z); 1092 PLogFlops(2*a->nz*bs2 ); 1093 return 0; 1094 } 1095 1096 static int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz) 1097 { 1098 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1099 Scalar *xg,*zg,*zb; 1100 register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 1101 int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 1102 int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1103 1104 1105 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 1106 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 1107 PetscMemzero(z,N*sizeof(Scalar)); 1108 1109 idx = a->j; 1110 v = a->a; 1111 ii = a->i; 1112 1113 switch (bs) { 1114 case 1: 1115 for ( i=0; i<mbs; i++ ) { 1116 n = ii[1] - ii[0]; ii++; 1117 xb = x + i; x1 = xb[0]; 1118 ib = idx + ai[i]; 1119 for ( j=0; j<n; j++ ) { 1120 rval = ib[j]; 1121 z[rval] += *v++ * x1; 1122 } 1123 } 1124 break; 1125 case 2: 1126 for ( i=0; i<mbs; i++ ) { 1127 n = ii[1] - ii[0]; ii++; 1128 xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1129 ib = idx + ai[i]; 1130 for ( j=0; j<n; j++ ) { 1131 rval = ib[j]*2; 1132 z[rval++] += v[0]*x1 + v[1]*x2; 1133 z[rval++] += v[2]*x1 + v[3]*x2; 1134 v += 4; 1135 } 1136 } 1137 break; 1138 case 3: 1139 for ( i=0; i<mbs; i++ ) { 1140 n = ii[1] - ii[0]; ii++; 1141 xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1142 ib = idx + ai[i]; 1143 for ( j=0; j<n; j++ ) { 1144 rval = ib[j]*3; 1145 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1146 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1147 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1148 v += 9; 1149 } 1150 } 1151 break; 1152 case 4: 1153 for ( i=0; i<mbs; i++ ) { 1154 n = ii[1] - ii[0]; ii++; 1155 xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1156 ib = idx + ai[i]; 1157 for ( j=0; j<n; j++ ) { 1158 rval = ib[j]*4; 1159 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1160 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1161 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1162 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1163 v += 16; 1164 } 1165 } 1166 break; 1167 case 5: 1168 for ( i=0; i<mbs; i++ ) { 1169 n = ii[1] - ii[0]; ii++; 1170 xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1171 x4 = xb[3]; x5 = xb[4]; 1172 ib = idx + ai[i]; 1173 for ( j=0; j<n; j++ ) { 1174 rval = ib[j]*5; 1175 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1176 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1177 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1178 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1179 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1180 v += 25; 1181 } 1182 } 1183 break; 1184 /* block sizes larger then 5 by 5 are handled by BLAS */ 1185 default: { 1186 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1187 if (!a->mult_work) { 1188 k = PetscMax(a->m,a->n); 1189 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1190 CHKPTRQ(a->mult_work); 1191 } 1192 work = a->mult_work; 1193 for ( i=0; i<mbs; i++ ) { 1194 n = ii[1] - ii[0]; ii++; 1195 ncols = n*bs; 1196 PetscMemzero(work,ncols*sizeof(Scalar)); 1197 LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1198 v += n*bs2; 1199 x += bs; 1200 workt = work; 1201 for ( j=0; j<n; j++ ) { 1202 zb = z + bs*(*idx++); 1203 for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1204 workt += bs; 1205 } 1206 } 1207 } 1208 } 1209 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 1210 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1211 PLogFlops(2*a->nz*a->bs2 - a->n); 1212 return 0; 1213 } 1214 static int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1215 { 1216 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1217 Scalar *xg,*zg,*zb; 1218 register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 1219 int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 1220 int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1221 1222 1223 1224 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 1225 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 1226 1227 if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 1228 else PetscMemzero(z,N*sizeof(Scalar)); 1229 1230 1231 idx = a->j; 1232 v = a->a; 1233 ii = a->i; 1234 1235 switch (bs) { 1236 case 1: 1237 for ( i=0; i<mbs; i++ ) { 1238 n = ii[1] - ii[0]; ii++; 1239 xb = x + i; x1 = xb[0]; 1240 ib = idx + ai[i]; 1241 for ( j=0; j<n; j++ ) { 1242 rval = ib[j]; 1243 z[rval] += *v++ * x1; 1244 } 1245 } 1246 break; 1247 case 2: 1248 for ( i=0; i<mbs; i++ ) { 1249 n = ii[1] - ii[0]; ii++; 1250 xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1251 ib = idx + ai[i]; 1252 for ( j=0; j<n; j++ ) { 1253 rval = ib[j]*2; 1254 z[rval++] += v[0]*x1 + v[1]*x2; 1255 z[rval++] += v[2]*x1 + v[3]*x2; 1256 v += 4; 1257 } 1258 } 1259 break; 1260 case 3: 1261 for ( i=0; i<mbs; i++ ) { 1262 n = ii[1] - ii[0]; ii++; 1263 xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1264 ib = idx + ai[i]; 1265 for ( j=0; j<n; j++ ) { 1266 rval = ib[j]*3; 1267 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1268 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1269 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1270 v += 9; 1271 } 1272 } 1273 break; 1274 case 4: 1275 for ( i=0; i<mbs; i++ ) { 1276 n = ii[1] - ii[0]; ii++; 1277 xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1278 ib = idx + ai[i]; 1279 for ( j=0; j<n; j++ ) { 1280 rval = ib[j]*4; 1281 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1282 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1283 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1284 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1285 v += 16; 1286 } 1287 } 1288 break; 1289 case 5: 1290 for ( i=0; i<mbs; i++ ) { 1291 n = ii[1] - ii[0]; ii++; 1292 xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1293 x4 = xb[3]; x5 = xb[4]; 1294 ib = idx + ai[i]; 1295 for ( j=0; j<n; j++ ) { 1296 rval = ib[j]*5; 1297 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1298 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1299 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1300 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1301 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1302 v += 25; 1303 } 1304 } 1305 break; 1306 /* block sizes larger then 5 by 5 are handled by BLAS */ 1307 default: { 1308 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1309 if (!a->mult_work) { 1310 k = PetscMax(a->m,a->n); 1311 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1312 CHKPTRQ(a->mult_work); 1313 } 1314 work = a->mult_work; 1315 for ( i=0; i<mbs; i++ ) { 1316 n = ii[1] - ii[0]; ii++; 1317 ncols = n*bs; 1318 PetscMemzero(work,ncols*sizeof(Scalar)); 1319 LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1320 v += n*bs2; 1321 x += bs; 1322 workt = work; 1323 for ( j=0; j<n; j++ ) { 1324 zb = z + bs*(*idx++); 1325 for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1326 workt += bs; 1327 } 1328 } 1329 } 1330 } 1331 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 1332 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1333 PLogFlops(2*a->nz*a->bs2); 1334 return 0; 1335 } 1336 1337 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem) 1338 { 1339 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1340 if (nz) *nz = a->bs2*a->nz; 1341 if (nza) *nza = a->maxnz; 1342 if (mem) *mem = (int)A->mem; 1343 return 0; 1344 } 1345 1346 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg) 1347 { 1348 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 1349 1350 if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type"); 1351 1352 /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 1353 if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)|| 1354 (a->nz != b->nz)) { 1355 *flg = PETSC_FALSE; return 0; 1356 } 1357 1358 /* if the a->i are the same */ 1359 if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) { 1360 *flg = PETSC_FALSE; return 0; 1361 } 1362 1363 /* if a->j are the same */ 1364 if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) { 1365 *flg = PETSC_FALSE; return 0; 1366 } 1367 1368 /* if a->a are the same */ 1369 if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) { 1370 *flg = PETSC_FALSE; return 0; 1371 } 1372 *flg = PETSC_TRUE; 1373 return 0; 1374 1375 } 1376 1377 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v) 1378 { 1379 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1380 int i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 1381 Scalar *x, zero = 0.0,*aa,*aa_j; 1382 1383 bs = a->bs; 1384 aa = a->a; 1385 ai = a->i; 1386 aj = a->j; 1387 ambs = a->mbs; 1388 bs2 = a->bs2; 1389 1390 VecSet(&zero,v); 1391 VecGetArray(v,&x); VecGetLocalSize(v,&n); 1392 if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector"); 1393 for ( i=0; i<ambs; i++ ) { 1394 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1395 if (aj[j] == i) { 1396 row = i*bs; 1397 aa_j = aa+j*bs2; 1398 for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 1399 break; 1400 } 1401 } 1402 } 1403 return 0; 1404 } 1405 1406 static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr) 1407 { 1408 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1409 Scalar *l,*r,x,*v,*aa,*li,*ri; 1410 int i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2; 1411 1412 ai = a->i; 1413 aj = a->j; 1414 aa = a->a; 1415 m = a->m; 1416 n = a->n; 1417 bs = a->bs; 1418 mbs = a->mbs; 1419 bs2 = a->bs2; 1420 if (ll) { 1421 VecGetArray(ll,&l); VecGetSize(ll,&lm); 1422 if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length"); 1423 for ( i=0; i<mbs; i++ ) { /* for each block row */ 1424 M = ai[i+1] - ai[i]; 1425 li = l + i*bs; 1426 v = aa + bs2*ai[i]; 1427 for ( j=0; j<M; j++ ) { /* for each block */ 1428 for ( k=0; k<bs2; k++ ) { 1429 (*v++) *= li[k%bs]; 1430 } 1431 } 1432 } 1433 } 1434 1435 if (rr) { 1436 VecGetArray(rr,&r); VecGetSize(rr,&rn); 1437 if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length"); 1438 for ( i=0; i<mbs; i++ ) { /* for each block row */ 1439 M = ai[i+1] - ai[i]; 1440 v = aa + bs2*ai[i]; 1441 for ( j=0; j<M; j++ ) { /* for each block */ 1442 ri = r + bs*aj[ai[i]+j]; 1443 for ( k=0; k<bs; k++ ) { 1444 x = ri[k]; 1445 for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x; 1446 } 1447 } 1448 } 1449 } 1450 return 0; 1451 } 1452 1453 1454 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 1455 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 1456 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int); 1457 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*); 1458 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**); 1459 1460 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec); 1461 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec); 1462 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec); 1463 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec); 1464 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec); 1465 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec); 1466 1467 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 1468 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 1469 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 1470 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 1471 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 1472 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 1473 1474 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm) 1475 { 1476 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1477 Scalar *v = a->a; 1478 double sum = 0.0; 1479 int i,nz=a->nz,bs2=a->bs2; 1480 1481 if (type == NORM_FROBENIUS) { 1482 for (i=0; i< bs2*nz; i++ ) { 1483 #if defined(PETSC_COMPLEX) 1484 sum += real(conj(*v)*(*v)); v++; 1485 #else 1486 sum += (*v)*(*v); v++; 1487 #endif 1488 } 1489 *norm = sqrt(sum); 1490 } 1491 else { 1492 SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet"); 1493 } 1494 return 0; 1495 } 1496 1497 /* 1498 note: This can only work for identity for row and col. It would 1499 be good to check this and otherwise generate an error. 1500 */ 1501 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill) 1502 { 1503 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1504 Mat outA; 1505 int ierr; 1506 1507 if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported"); 1508 1509 outA = inA; 1510 inA->factor = FACTOR_LU; 1511 a->row = row; 1512 a->col = col; 1513 1514 a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 1515 1516 if (!a->diag) { 1517 ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr); 1518 } 1519 ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr); 1520 return 0; 1521 } 1522 1523 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA) 1524 { 1525 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1526 int one = 1, totalnz = a->bs2*a->nz; 1527 BLscal_( &totalnz, alpha, a->a, &one ); 1528 PLogFlops(totalnz); 1529 return 0; 1530 } 1531 1532 static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 1533 { 1534 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1535 int *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 1536 int *ai = a->i, *ailen = a->ilen; 1537 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 1538 Scalar *ap, *aa = a->a, zero = 0.0; 1539 1540 for ( k=0; k<m; k++ ) { /* loop over rows */ 1541 row = im[k]; brow = row/bs; 1542 if (row < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative row"); 1543 if (row >= a->m) SETERRQ(1,"MatGetValues_SeqBAIJ:Row too large"); 1544 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1545 nrow = ailen[brow]; 1546 for ( l=0; l<n; l++ ) { /* loop over columns */ 1547 if (in[l] < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative column"); 1548 if (in[l] >= a->n) SETERRQ(1,"MatGetValues_SeqBAIJ:Column too large"); 1549 col = in[l] ; 1550 bcol = col/bs; 1551 cidx = col%bs; 1552 ridx = row%bs; 1553 high = nrow; 1554 low = 0; /* assume unsorted */ 1555 while (high-low > 5) { 1556 t = (low+high)/2; 1557 if (rp[t] > bcol) high = t; 1558 else low = t; 1559 } 1560 for ( i=low; i<high; i++ ) { 1561 if (rp[i] > bcol) break; 1562 if (rp[i] == bcol) { 1563 *v++ = ap[bs2*i+bs*cidx+ridx]; 1564 goto finished; 1565 } 1566 } 1567 *v++ = zero; 1568 finished:; 1569 } 1570 } 1571 return 0; 1572 } 1573 1574 static int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs) 1575 { 1576 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data; 1577 *bs = baij->bs; 1578 return 0; 1579 } 1580 1581 /* idx should be of length atlease bs */ 1582 static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg) 1583 { 1584 int i,row; 1585 row = idx[0]; 1586 if (row%bs!=0) { *flg = PETSC_FALSE; return 0; } 1587 1588 for ( i=1; i<bs; i++ ) { 1589 if (row+i != idx[i]) { *flg = PETSC_FALSE; return 0; } 1590 } 1591 *flg = PETSC_TRUE; 1592 return 0; 1593 } 1594 1595 static int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag) 1596 { 1597 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1598 IS is_local; 1599 int ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2; 1600 PetscTruth flg; 1601 Scalar zero = 0.0,*aa; 1602 1603 /* Make a copy of the IS and sort it */ 1604 ierr = ISGetSize(is,&is_n);CHKERRQ(ierr); 1605 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1606 ierr = ISCreateGeneral(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr); 1607 ierr = ISSort(is_local); CHKERRQ(ierr); 1608 ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr); 1609 1610 i = 0; 1611 while (i < is_n) { 1612 if (rows[i]<0 || rows[i]>m) SETERRQ(1,"MatZeroRows_SeqBAIJ:row out of range"); 1613 flg = PETSC_FALSE; 1614 if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); } 1615 if (flg) { /* There exists a block of rows to be Zerowed */ 1616 baij->ilen[rows[i]/bs] = 0; 1617 i += bs; 1618 } else { /* Zero out only the requested row */ 1619 count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs; 1620 aa = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs); 1621 for ( j=0; j<count; j++ ) { 1622 aa[0] = zero; 1623 aa+=bs; 1624 } 1625 i++; 1626 } 1627 } 1628 if (diag) { 1629 for ( j=0; j<is_n; j++ ) { 1630 ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1631 } 1632 } 1633 ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr); 1634 ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr); 1635 ierr = ISDestroy(is_local); CHKERRQ(ierr); 1636 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1637 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1638 1639 return 0; 1640 } 1641 int MatPrintHelp_SeqBAIJ(Mat A) 1642 { 1643 static int called = 0; 1644 MPI_Comm comm = A->comm; 1645 1646 if (called) return 0; else called = 1; 1647 PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n"); 1648 PetscPrintf(comm," -mat_block_size <block_size>\n"); 1649 return 0; 1650 } 1651 1652 /* -------------------------------------------------------------------*/ 1653 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ, 1654 MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ, 1655 MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N, 1656 MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ, 1657 MatSolve_SeqBAIJ_N,0, 1658 0,0, 1659 MatLUFactor_SeqBAIJ,0, 1660 0, 1661 MatTranspose_SeqBAIJ, 1662 MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ, 1663 MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ, 1664 0,MatAssemblyEnd_SeqBAIJ, 1665 0, 1666 MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ, 1667 MatGetReordering_SeqBAIJ, 1668 MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0, 1669 MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ, 1670 MatILUFactorSymbolic_SeqBAIJ,0, 1671 0,0,/* MatConvert_SeqBAIJ */ 0, 1672 MatConvertSameType_SeqBAIJ,0,0, 1673 MatILUFactor_SeqBAIJ,0,0, 1674 MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ, 1675 MatGetValues_SeqBAIJ,0, 1676 MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ, 1677 0,0,0,MatGetBlockSize_SeqBAIJ}; 1678 1679 /*@C 1680 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1681 compressed row) format. For good matrix assembly performance the 1682 user should preallocate the matrix storage by setting the parameter nz 1683 (or the array nzz). By setting these parameters accurately, performance 1684 during matrix assembly can be increased by more than a factor of 50. 1685 1686 Input Parameters: 1687 . comm - MPI communicator, set to MPI_COMM_SELF 1688 . bs - size of block 1689 . m - number of rows 1690 . n - number of columns 1691 . nz - number of block nonzeros per block row (same for all rows) 1692 . nzz - array containing the number of block nonzeros in the various block rows 1693 (possibly different for each block row) or PETSC_NULL 1694 1695 Output Parameter: 1696 . A - the matrix 1697 1698 Notes: 1699 The block AIJ format is fully compatible with standard Fortran 77 1700 storage. That is, the stored row and column indices can begin at 1701 either one (as in Fortran) or zero. See the users' manual for details. 1702 1703 Specify the preallocated storage with either nz or nnz (not both). 1704 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1705 allocation. For additional details, see the users manual chapter on 1706 matrices and the file $(PETSC_DIR)/Performance. 1707 1708 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1709 @*/ 1710 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 1711 { 1712 Mat B; 1713 Mat_SeqBAIJ *b; 1714 int i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs; 1715 1716 if (mbs*bs!=m || nbs*bs!=n) 1717 SETERRQ(1,"MatCreateSeqBAIJ:Number rows, cols must be divisible by blocksize"); 1718 1719 *A = 0; 1720 PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm); 1721 PLogObjectCreate(B); 1722 B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b); 1723 PetscMemzero(b,sizeof(Mat_SeqBAIJ)); 1724 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 1725 ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr); 1726 if (!flg) { 1727 switch (bs) { 1728 case 1: 1729 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1730 B->ops.solve = MatSolve_SeqBAIJ_1; 1731 B->ops.mult = MatMult_SeqBAIJ_1; 1732 B->ops.multadd = MatMultAdd_SeqBAIJ_1; 1733 break; 1734 case 2: 1735 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 1736 B->ops.solve = MatSolve_SeqBAIJ_2; 1737 B->ops.mult = MatMult_SeqBAIJ_2; 1738 B->ops.multadd = MatMultAdd_SeqBAIJ_2; 1739 break; 1740 case 3: 1741 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 1742 B->ops.solve = MatSolve_SeqBAIJ_3; 1743 B->ops.mult = MatMult_SeqBAIJ_3; 1744 B->ops.multadd = MatMultAdd_SeqBAIJ_3; 1745 break; 1746 case 4: 1747 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 1748 B->ops.solve = MatSolve_SeqBAIJ_4; 1749 B->ops.mult = MatMult_SeqBAIJ_4; 1750 B->ops.multadd = MatMultAdd_SeqBAIJ_4; 1751 break; 1752 case 5: 1753 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 1754 B->ops.solve = MatSolve_SeqBAIJ_5; 1755 B->ops.mult = MatMult_SeqBAIJ_5; 1756 B->ops.multadd = MatMultAdd_SeqBAIJ_5; 1757 break; 1758 } 1759 } 1760 B->destroy = MatDestroy_SeqBAIJ; 1761 B->view = MatView_SeqBAIJ; 1762 B->factor = 0; 1763 B->lupivotthreshold = 1.0; 1764 b->row = 0; 1765 b->col = 0; 1766 b->reallocs = 0; 1767 1768 b->m = m; B->m = m; B->M = m; 1769 b->n = n; B->n = n; B->N = n; 1770 b->mbs = mbs; 1771 b->nbs = nbs; 1772 b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax); 1773 if (nnz == PETSC_NULL) { 1774 if (nz == PETSC_DEFAULT) nz = 5; 1775 else if (nz <= 0) nz = 1; 1776 for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 1777 nz = nz*mbs; 1778 } 1779 else { 1780 nz = 0; 1781 for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 1782 } 1783 1784 /* allocate the matrix space */ 1785 len = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int); 1786 b->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a); 1787 PetscMemzero(b->a,nz*bs2*sizeof(Scalar)); 1788 b->j = (int *) (b->a + nz*bs2); 1789 PetscMemzero(b->j,nz*sizeof(int)); 1790 b->i = b->j + nz; 1791 b->singlemalloc = 1; 1792 1793 b->i[0] = 0; 1794 for (i=1; i<mbs+1; i++) { 1795 b->i[i] = b->i[i-1] + b->imax[i-1]; 1796 } 1797 1798 /* b->ilen will count nonzeros in each block row so far. */ 1799 b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 1800 PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 1801 for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 1802 1803 b->bs = bs; 1804 b->bs2 = bs2; 1805 b->mbs = mbs; 1806 b->nz = 0; 1807 b->maxnz = nz*bs2; 1808 b->sorted = 0; 1809 b->roworiented = 1; 1810 b->nonew = 0; 1811 b->diag = 0; 1812 b->solve_work = 0; 1813 b->mult_work = 0; 1814 b->spptr = 0; 1815 *A = B; 1816 ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr); 1817 if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); } 1818 return 0; 1819 } 1820 1821 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues) 1822 { 1823 Mat C; 1824 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 1825 int i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 1826 1827 if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix"); 1828 1829 *B = 0; 1830 PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm); 1831 PLogObjectCreate(C); 1832 C->data = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c); 1833 PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps)); 1834 C->destroy = MatDestroy_SeqBAIJ; 1835 C->view = MatView_SeqBAIJ; 1836 C->factor = A->factor; 1837 c->row = 0; 1838 c->col = 0; 1839 C->assembled = PETSC_TRUE; 1840 1841 c->m = C->m = a->m; 1842 c->n = C->n = a->n; 1843 C->M = a->m; 1844 C->N = a->n; 1845 1846 c->bs = a->bs; 1847 c->bs2 = a->bs2; 1848 c->mbs = a->mbs; 1849 c->nbs = a->nbs; 1850 1851 c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax); 1852 c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen); 1853 for ( i=0; i<mbs; i++ ) { 1854 c->imax[i] = a->imax[i]; 1855 c->ilen[i] = a->ilen[i]; 1856 } 1857 1858 /* allocate the matrix space */ 1859 c->singlemalloc = 1; 1860 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int)); 1861 c->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a); 1862 c->j = (int *) (c->a + nz*bs2); 1863 c->i = c->j + nz; 1864 PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int)); 1865 if (mbs > 0) { 1866 PetscMemcpy(c->j,a->j,nz*sizeof(int)); 1867 if (cpvalues == COPY_VALUES) { 1868 PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar)); 1869 } 1870 } 1871 1872 PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 1873 c->sorted = a->sorted; 1874 c->roworiented = a->roworiented; 1875 c->nonew = a->nonew; 1876 1877 if (a->diag) { 1878 c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag); 1879 PLogObjectMemory(C,(mbs+1)*sizeof(int)); 1880 for ( i=0; i<mbs; i++ ) { 1881 c->diag[i] = a->diag[i]; 1882 } 1883 } 1884 else c->diag = 0; 1885 c->nz = a->nz; 1886 c->maxnz = a->maxnz; 1887 c->solve_work = 0; 1888 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1889 c->mult_work = 0; 1890 *B = C; 1891 return 0; 1892 } 1893 1894 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 1895 { 1896 Mat_SeqBAIJ *a; 1897 Mat B; 1898 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg; 1899 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1900 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1901 int *masked, nmask,tmp,bs2,ishift; 1902 Scalar *aa; 1903 MPI_Comm comm = ((PetscObject) viewer)->comm; 1904 1905 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 1906 bs2 = bs*bs; 1907 1908 MPI_Comm_size(comm,&size); 1909 if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor"); 1910 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1911 ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr); 1912 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object"); 1913 M = header[1]; N = header[2]; nz = header[3]; 1914 1915 if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices"); 1916 1917 /* 1918 This code adds extra rows to make sure the number of rows is 1919 divisible by the blocksize 1920 */ 1921 mbs = M/bs; 1922 extra_rows = bs - M + bs*(mbs); 1923 if (extra_rows == bs) extra_rows = 0; 1924 else mbs++; 1925 if (extra_rows) { 1926 PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 1927 } 1928 1929 /* read in row lengths */ 1930 rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 1931 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 1932 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 1933 1934 /* read in column indices */ 1935 jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj); 1936 ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr); 1937 for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 1938 1939 /* loop over row lengths determining block row lengths */ 1940 browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 1941 PetscMemzero(browlengths,mbs*sizeof(int)); 1942 mask = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask); 1943 PetscMemzero(mask,mbs*sizeof(int)); 1944 masked = mask + mbs; 1945 rowcount = 0; nzcount = 0; 1946 for ( i=0; i<mbs; i++ ) { 1947 nmask = 0; 1948 for ( j=0; j<bs; j++ ) { 1949 kmax = rowlengths[rowcount]; 1950 for ( k=0; k<kmax; k++ ) { 1951 tmp = jj[nzcount++]/bs; 1952 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1953 } 1954 rowcount++; 1955 } 1956 browlengths[i] += nmask; 1957 /* zero out the mask elements we set */ 1958 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 1959 } 1960 1961 /* create our matrix */ 1962 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A); 1963 CHKERRQ(ierr); 1964 B = *A; 1965 a = (Mat_SeqBAIJ *) B->data; 1966 1967 /* set matrix "i" values */ 1968 a->i[0] = 0; 1969 for ( i=1; i<= mbs; i++ ) { 1970 a->i[i] = a->i[i-1] + browlengths[i-1]; 1971 a->ilen[i-1] = browlengths[i-1]; 1972 } 1973 a->nz = 0; 1974 for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 1975 1976 /* read in nonzero values */ 1977 aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 1978 ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr); 1979 for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 1980 1981 /* set "a" and "j" values into matrix */ 1982 nzcount = 0; jcount = 0; 1983 for ( i=0; i<mbs; i++ ) { 1984 nzcountb = nzcount; 1985 nmask = 0; 1986 for ( j=0; j<bs; j++ ) { 1987 kmax = rowlengths[i*bs+j]; 1988 for ( k=0; k<kmax; k++ ) { 1989 tmp = jj[nzcount++]/bs; 1990 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1991 } 1992 rowcount++; 1993 } 1994 /* sort the masked values */ 1995 PetscSortInt(nmask,masked); 1996 1997 /* set "j" values into matrix */ 1998 maskcount = 1; 1999 for ( j=0; j<nmask; j++ ) { 2000 a->j[jcount++] = masked[j]; 2001 mask[masked[j]] = maskcount++; 2002 } 2003 /* set "a" values into matrix */ 2004 ishift = bs2*a->i[i]; 2005 for ( j=0; j<bs; j++ ) { 2006 kmax = rowlengths[i*bs+j]; 2007 for ( k=0; k<kmax; k++ ) { 2008 tmp = jj[nzcountb]/bs ; 2009 block = mask[tmp] - 1; 2010 point = jj[nzcountb] - bs*tmp; 2011 idx = ishift + bs2*block + j + bs*point; 2012 a->a[idx] = aa[nzcountb++]; 2013 } 2014 } 2015 /* zero out the mask elements we set */ 2016 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 2017 } 2018 if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix"); 2019 2020 PetscFree(rowlengths); 2021 PetscFree(browlengths); 2022 PetscFree(aa); 2023 PetscFree(jj); 2024 PetscFree(mask); 2025 2026 B->assembled = PETSC_TRUE; 2027 2028 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr); 2029 if (flg) { 2030 Viewer tviewer; 2031 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2032 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO,0);CHKERRQ(ierr); 2033 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2034 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2035 } 2036 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr); 2037 if (flg) { 2038 Viewer tviewer; 2039 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2040 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr); 2041 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2042 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2043 } 2044 ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr); 2045 if (flg) { 2046 Viewer tviewer; 2047 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2048 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2049 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2050 } 2051 ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr); 2052 if (flg) { 2053 Viewer tviewer; 2054 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2055 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_MATLAB,"M");CHKERRQ(ierr); 2056 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2057 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2058 } 2059 ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr); 2060 if (flg) { 2061 Viewer tviewer; 2062 ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr); 2063 ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr); 2064 ierr = ViewerFlush(tviewer); CHKERRQ(ierr); 2065 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2066 } 2067 return 0; 2068 } 2069 2070 2071 2072