1 #ifndef lint 2 static char vcid[] = "$Id: baij.c,v 1.66 1996/08/15 12:48:03 bsmith Exp curfman $"; 3 #endif 4 5 /* 6 Defines the basic matrix operations for the BAIJ (compressed row) 7 matrix storage format. 8 */ 9 #include "src/mat/impls/baij/seq/baij.h" 10 #include "src/vec/vecimpl.h" 11 #include "src/inline/spops.h" 12 #include "petsc.h" 13 14 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 15 16 static int MatGetReordering_SeqBAIJ(Mat A,MatReordering type,IS *rperm,IS *cperm) 17 { 18 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 19 int ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift; 20 21 /* 22 this is tacky: In the future when we have written special factorization 23 and solve routines for the identity permutation we should use a 24 stride index set instead of the general one. 25 */ 26 if (type == ORDER_NATURAL) { 27 idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx); 28 for ( i=0; i<n; i++ ) idx[i] = i; 29 ierr = ISCreateGeneral(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr); 30 ierr = ISCreateGeneral(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr); 31 PetscFree(idx); 32 ISSetPermutation(*rperm); 33 ISSetPermutation(*cperm); 34 ISSetIdentity(*rperm); 35 ISSetIdentity(*cperm); 36 return 0; 37 } 38 39 MatReorderingRegisterAll(); 40 ishift = 0; 41 oshift = -MatReorderingIndexShift[(int)type]; 42 if (MatReorderingRequiresSymmetric[(int)type]) { 43 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,ishift,oshift,&ia,&ja);CHKERRQ(ierr); 44 ierr = MatGetReordering_IJ(n,ia,ja,type,rperm,cperm); CHKERRQ(ierr); 45 PetscFree(ia); PetscFree(ja); 46 } else { 47 if (ishift == oshift) { 48 ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 49 } 50 else if (ishift == -1) { 51 /* temporarily subtract 1 from i and j indices */ 52 int nz = a->i[n] - 1; 53 for ( i=0; i<nz; i++ ) a->j[i]--; 54 for ( i=0; i<n+1; i++ ) a->i[i]--; 55 ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 56 for ( i=0; i<nz; i++ ) a->j[i]++; 57 for ( i=0; i<n+1; i++ ) a->i[i]++; 58 } else { 59 /* temporarily add 1 to i and j indices */ 60 int nz = a->i[n] - 1; 61 for ( i=0; i<nz; i++ ) a->j[i]++; 62 for ( i=0; i<n+1; i++ ) a->i[i]++; 63 ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 64 for ( i=0; i<nz; i++ ) a->j[i]--; 65 for ( i=0; i<n+1; i++ ) a->i[i]--; 66 } 67 } 68 return 0; 69 } 70 71 /* 72 Adds diagonal pointers to sparse matrix structure. 73 */ 74 75 int MatMarkDiag_SeqBAIJ(Mat A) 76 { 77 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 78 int i,j, *diag, m = a->mbs; 79 80 diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag); 81 PLogObjectMemory(A,(m+1)*sizeof(int)); 82 for ( i=0; i<m; i++ ) { 83 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 84 if (a->j[j] == i) { 85 diag[i] = j; 86 break; 87 } 88 } 89 } 90 a->diag = diag; 91 return 0; 92 } 93 94 #include "draw.h" 95 #include "pinclude/pviewer.h" 96 #include "sys.h" 97 98 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 99 { 100 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 101 int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 102 Scalar *aa; 103 104 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 105 col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 106 col_lens[0] = MAT_COOKIE; 107 col_lens[1] = a->m; 108 col_lens[2] = a->n; 109 col_lens[3] = a->nz*bs2; 110 111 /* store lengths of each row and write (including header) to file */ 112 count = 0; 113 for ( i=0; i<a->mbs; i++ ) { 114 for ( j=0; j<bs; j++ ) { 115 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 116 } 117 } 118 ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr); 119 PetscFree(col_lens); 120 121 /* store column indices (zero start index) */ 122 jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj); 123 count = 0; 124 for ( i=0; i<a->mbs; i++ ) { 125 for ( j=0; j<bs; j++ ) { 126 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 127 for ( l=0; l<bs; l++ ) { 128 jj[count++] = bs*a->j[k] + l; 129 } 130 } 131 } 132 } 133 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr); 134 PetscFree(jj); 135 136 /* store nonzero values */ 137 aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa); 138 count = 0; 139 for ( i=0; i<a->mbs; i++ ) { 140 for ( j=0; j<bs; j++ ) { 141 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 142 for ( l=0; l<bs; l++ ) { 143 aa[count++] = a->a[bs2*k + l*bs + j]; 144 } 145 } 146 } 147 } 148 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr); 149 PetscFree(aa); 150 return 0; 151 } 152 153 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 154 { 155 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 156 int ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2; 157 FILE *fd; 158 char *outputname; 159 160 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 161 ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr); 162 ierr = ViewerGetFormat(viewer,&format); 163 if (format == ASCII_FORMAT_INFO || format == ASCII_FORMAT_INFO_DETAILED) { 164 fprintf(fd," block size is %d\n",bs); 165 } 166 else if (format == ASCII_FORMAT_MATLAB) { 167 SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported"); 168 } 169 else if (format == ASCII_FORMAT_COMMON) { 170 for ( i=0; i<a->mbs; i++ ) { 171 for ( j=0; j<bs; j++ ) { 172 fprintf(fd,"row %d:",i*bs+j); 173 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 174 for ( l=0; l<bs; l++ ) { 175 #if defined(PETSC_COMPLEX) 176 if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0) 177 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 178 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 179 else if (real(a->a[bs2*k + l*bs + j]) != 0.0) 180 fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 181 #else 182 if (a->a[bs2*k + l*bs + j] != 0.0) 183 fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 184 #endif 185 } 186 } 187 fprintf(fd,"\n"); 188 } 189 } 190 } 191 else { 192 for ( i=0; i<a->mbs; i++ ) { 193 for ( j=0; j<bs; j++ ) { 194 fprintf(fd,"row %d:",i*bs+j); 195 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 196 for ( l=0; l<bs; l++ ) { 197 #if defined(PETSC_COMPLEX) 198 if (imag(a->a[bs2*k + l*bs + j]) != 0.0) { 199 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 200 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 201 } 202 else { 203 fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 204 } 205 #else 206 fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 207 #endif 208 } 209 } 210 fprintf(fd,"\n"); 211 } 212 } 213 } 214 fflush(fd); 215 return 0; 216 } 217 218 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer) 219 { 220 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *) A->data; 221 int row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2; 222 double xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r; 223 Scalar *aa; 224 Draw draw; 225 DrawButton button; 226 PetscTruth isnull; 227 228 ViewerDrawGetDraw(viewer,&draw); 229 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 230 231 xr = a->n; yr = a->m; h = yr/10.0; w = xr/10.0; 232 xr += w; yr += h; xl = -w; yl = -h; 233 ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr); 234 /* loop over matrix elements drawing boxes */ 235 color = DRAW_BLUE; 236 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 237 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 238 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 239 x_l = a->j[j]*bs; x_r = x_l + 1.0; 240 aa = a->a + j*bs2; 241 for ( k=0; k<bs; k++ ) { 242 for ( l=0; l<bs; l++ ) { 243 if (PetscReal(*aa++) >= 0.) continue; 244 DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 245 } 246 } 247 } 248 } 249 color = DRAW_CYAN; 250 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 251 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 252 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 253 x_l = a->j[j]*bs; x_r = x_l + 1.0; 254 aa = a->a + j*bs2; 255 for ( k=0; k<bs; k++ ) { 256 for ( l=0; l<bs; l++ ) { 257 if (PetscReal(*aa++) != 0.) continue; 258 DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 259 } 260 } 261 } 262 } 263 264 color = DRAW_RED; 265 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 266 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 267 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 268 x_l = a->j[j]*bs; x_r = x_l + 1.0; 269 aa = a->a + j*bs2; 270 for ( k=0; k<bs; k++ ) { 271 for ( l=0; l<bs; l++ ) { 272 if (PetscReal(*aa++) <= 0.) continue; 273 DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 274 } 275 } 276 } 277 } 278 279 DrawFlush(draw); 280 DrawGetPause(draw,&pause); 281 if (pause >= 0) { PetscSleep(pause); return 0;} 282 283 /* allow the matrix to zoom or shrink */ 284 ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0); 285 while (button != BUTTON_RIGHT) { 286 DrawClear(draw); 287 if (button == BUTTON_LEFT) scale = .5; 288 else if (button == BUTTON_CENTER) scale = 2.; 289 xl = scale*(xl + w - xc) + xc - w*scale; 290 xr = scale*(xr - w - xc) + xc + w*scale; 291 yl = scale*(yl + h - yc) + yc - h*scale; 292 yr = scale*(yr - h - yc) + yc + h*scale; 293 w *= scale; h *= scale; 294 ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr); 295 color = DRAW_BLUE; 296 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 297 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 298 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 299 x_l = a->j[j]*bs; x_r = x_l + 1.0; 300 aa = a->a + j*bs2; 301 for ( k=0; k<bs; k++ ) { 302 for ( l=0; l<bs; l++ ) { 303 if (PetscReal(*aa++) >= 0.) continue; 304 DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 305 } 306 } 307 } 308 } 309 color = DRAW_CYAN; 310 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 311 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 312 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 313 x_l = a->j[j]*bs; x_r = x_l + 1.0; 314 aa = a->a + j*bs2; 315 for ( k=0; k<bs; k++ ) { 316 for ( l=0; l<bs; l++ ) { 317 if (PetscReal(*aa++) != 0.) continue; 318 DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 319 } 320 } 321 } 322 } 323 324 color = DRAW_RED; 325 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 326 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 327 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 328 x_l = a->j[j]*bs; x_r = x_l + 1.0; 329 aa = a->a + j*bs2; 330 for ( k=0; k<bs; k++ ) { 331 for ( l=0; l<bs; l++ ) { 332 if (PetscReal(*aa++) <= 0.) continue; 333 DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 334 } 335 } 336 } 337 } 338 ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0); 339 } 340 return 0; 341 } 342 343 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer) 344 { 345 Mat A = (Mat) obj; 346 ViewerType vtype; 347 int ierr; 348 349 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 350 if (vtype == MATLAB_VIEWER) { 351 SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported"); 352 } 353 else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){ 354 return MatView_SeqBAIJ_ASCII(A,viewer); 355 } 356 else if (vtype == BINARY_FILE_VIEWER) { 357 return MatView_SeqBAIJ_Binary(A,viewer); 358 } 359 else if (vtype == DRAW_VIEWER) { 360 return MatView_SeqBAIJ_Draw(A,viewer); 361 } 362 return 0; 363 } 364 365 #define CHUNKSIZE 10 366 367 /* This version has row oriented v */ 368 static int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 369 { 370 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 371 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 372 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 373 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 374 int ridx,cidx,bs2=a->bs2; 375 Scalar *ap,value,*aa=a->a,*bap; 376 377 for ( k=0; k<m; k++ ) { /* loop over added rows */ 378 row = im[k]; brow = row/bs; 379 if (row < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative row"); 380 if (row >= a->m) SETERRQ(1,"MatSetValues_SeqBAIJ:Row too large"); 381 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 382 rmax = imax[brow]; nrow = ailen[brow]; 383 low = 0; 384 for ( l=0; l<n; l++ ) { /* loop over added columns */ 385 if (in[l] < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative column"); 386 if (in[l] >= a->n) SETERRQ(1,"MatSetValues_SeqBAIJ:Column too large"); 387 col = in[l]; bcol = col/bs; 388 ridx = row % bs; cidx = col % bs; 389 if (roworiented) { 390 value = *v++; 391 } 392 else { 393 value = v[k + l*m]; 394 } 395 if (!sorted) low = 0; high = nrow; 396 while (high-low > 5) { 397 t = (low+high)/2; 398 if (rp[t] > bcol) high = t; 399 else low = t; 400 } 401 for ( i=low; i<high; i++ ) { 402 if (rp[i] > bcol) break; 403 if (rp[i] == bcol) { 404 bap = ap + bs2*i + bs*cidx + ridx; 405 if (is == ADD_VALUES) *bap += value; 406 else *bap = value; 407 goto noinsert; 408 } 409 } 410 if (nonew) goto noinsert; 411 if (nrow >= rmax) { 412 /* there is no extra room in row, therefore enlarge */ 413 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 414 Scalar *new_a; 415 416 /* malloc new storage space */ 417 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 418 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 419 new_j = (int *) (new_a + bs2*new_nz); 420 new_i = new_j + new_nz; 421 422 /* copy over old data into new slots */ 423 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} 424 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 425 PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); 426 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 427 PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, 428 len*sizeof(int)); 429 PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); 430 PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 431 PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), 432 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); 433 /* free up old matrix storage */ 434 PetscFree(a->a); 435 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 436 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 437 a->singlemalloc = 1; 438 439 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 440 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 441 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 442 a->maxnz += bs2*CHUNKSIZE; 443 a->reallocs++; 444 a->nz++; 445 } 446 N = nrow++ - 1; 447 /* shift up all the later entries in this row */ 448 for ( ii=N; ii>=i; ii-- ) { 449 rp[ii+1] = rp[ii]; 450 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 451 } 452 if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 453 rp[i] = bcol; 454 ap[bs2*i + bs*cidx + ridx] = value; 455 noinsert:; 456 low = i; 457 } 458 ailen[brow] = nrow; 459 } 460 return 0; 461 } 462 463 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 464 { 465 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 466 *m = a->m; *n = a->n; 467 return 0; 468 } 469 470 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 471 { 472 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 473 *m = 0; *n = a->m; 474 return 0; 475 } 476 477 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 478 { 479 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 480 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 481 Scalar *aa,*v_i,*aa_i; 482 483 bs = a->bs; 484 ai = a->i; 485 aj = a->j; 486 aa = a->a; 487 bs2 = a->bs2; 488 489 if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range"); 490 491 bn = row/bs; /* Block number */ 492 bp = row % bs; /* Block Position */ 493 M = ai[bn+1] - ai[bn]; 494 *nz = bs*M; 495 496 if (v) { 497 *v = 0; 498 if (*nz) { 499 *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v); 500 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 501 v_i = *v + i*bs; 502 aa_i = aa + bs2*(ai[bn] + i); 503 for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];} 504 } 505 } 506 } 507 508 if (idx) { 509 *idx = 0; 510 if (*nz) { 511 *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx); 512 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 513 idx_i = *idx + i*bs; 514 itmp = bs*aj[ai[bn] + i]; 515 for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;} 516 } 517 } 518 } 519 return 0; 520 } 521 522 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 523 { 524 if (idx) {if (*idx) PetscFree(*idx);} 525 if (v) {if (*v) PetscFree(*v);} 526 return 0; 527 } 528 529 static int MatTranspose_SeqBAIJ(Mat A,Mat *B) 530 { 531 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 532 Mat C; 533 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 534 int *rows,*cols,bs2=a->bs2; 535 Scalar *array=a->a; 536 537 if (B==PETSC_NULL && mbs!=nbs) 538 SETERRQ(1,"MatTranspose_SeqBAIJ:Square matrix only for in-place"); 539 col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col); 540 PetscMemzero(col,(1+nbs)*sizeof(int)); 541 542 for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1; 543 ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr); 544 PetscFree(col); 545 rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows); 546 cols = rows + bs; 547 for ( i=0; i<mbs; i++ ) { 548 cols[0] = i*bs; 549 for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1; 550 len = ai[i+1] - ai[i]; 551 for ( j=0; j<len; j++ ) { 552 rows[0] = (*aj++)*bs; 553 for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1; 554 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr); 555 array += bs2; 556 } 557 } 558 PetscFree(rows); 559 560 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 561 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 562 563 if (B != PETSC_NULL) { 564 *B = C; 565 } else { 566 /* This isn't really an in-place transpose */ 567 PetscFree(a->a); 568 if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);} 569 if (a->diag) PetscFree(a->diag); 570 if (a->ilen) PetscFree(a->ilen); 571 if (a->imax) PetscFree(a->imax); 572 if (a->solve_work) PetscFree(a->solve_work); 573 PetscFree(a); 574 PetscMemcpy(A,C,sizeof(struct _Mat)); 575 PetscHeaderDestroy(C); 576 } 577 return 0; 578 } 579 580 581 static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 582 { 583 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 584 int fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax; 585 int m = a->m,*ip, N, *ailen = a->ilen; 586 int mbs = a->mbs, bs2 = a->bs2; 587 Scalar *aa = a->a, *ap; 588 589 if (mode == MAT_FLUSH_ASSEMBLY) return 0; 590 591 for ( i=1; i<mbs; i++ ) { 592 /* move each row back by the amount of empty slots (fshift) before it*/ 593 fshift += imax[i-1] - ailen[i-1]; 594 if (fshift) { 595 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 596 N = ailen[i]; 597 for ( j=0; j<N; j++ ) { 598 ip[j-fshift] = ip[j]; 599 PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar)); 600 } 601 } 602 ai[i] = ai[i-1] + ailen[i-1]; 603 } 604 if (mbs) { 605 fshift += imax[mbs-1] - ailen[mbs-1]; 606 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 607 } 608 /* reset ilen and imax for each row */ 609 for ( i=0; i<mbs; i++ ) { 610 ailen[i] = imax[i] = ai[i+1] - ai[i]; 611 } 612 a->nz = ai[mbs]; 613 614 /* diagonals may have moved, so kill the diagonal pointers */ 615 if (fshift && a->diag) { 616 PetscFree(a->diag); 617 PLogObjectMemory(A,-(m+1)*sizeof(int)); 618 a->diag = 0; 619 } 620 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Unneed storage space %d used %d, rows %d, block size %d\n", 621 fshift*bs2,a->nz*bs2,m,a->bs); 622 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues %d\n", 623 a->reallocs); 624 A->info.nz_unneeded = (double)fshift*bs2; 625 626 return 0; 627 } 628 629 static int MatZeroEntries_SeqBAIJ(Mat A) 630 { 631 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 632 PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar)); 633 return 0; 634 } 635 636 int MatDestroy_SeqBAIJ(PetscObject obj) 637 { 638 Mat A = (Mat) obj; 639 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 640 641 #if defined(PETSC_LOG) 642 PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 643 #endif 644 PetscFree(a->a); 645 if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);} 646 if (a->diag) PetscFree(a->diag); 647 if (a->ilen) PetscFree(a->ilen); 648 if (a->imax) PetscFree(a->imax); 649 if (a->solve_work) PetscFree(a->solve_work); 650 if (a->mult_work) PetscFree(a->mult_work); 651 PetscFree(a); 652 PLogObjectDestroy(A); 653 PetscHeaderDestroy(A); 654 return 0; 655 } 656 657 static int MatSetOption_SeqBAIJ(Mat A,MatOption op) 658 { 659 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 660 if (op == MAT_ROW_ORIENTED) a->roworiented = 1; 661 else if (op == MAT_COLUMN_ORIENTED) a->roworiented = 0; 662 else if (op == MAT_COLUMNS_SORTED) a->sorted = 1; 663 else if (op == MAT_NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 664 else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 665 else if (op == MAT_ROWS_SORTED || 666 op == MAT_SYMMETRIC || 667 op == MAT_STRUCTURALLY_SYMMETRIC || 668 op == MAT_YES_NEW_DIAGONALS) 669 PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n"); 670 else if (op == MAT_NO_NEW_DIAGONALS) 671 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:MAT_NO_NEW_DIAGONALS");} 672 else 673 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");} 674 return 0; 675 } 676 677 678 /* -------------------------------------------------------*/ 679 /* Should check that shapes of vectors and matrices match */ 680 /* -------------------------------------------------------*/ 681 #include "pinclude/plapack.h" 682 683 static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz) 684 { 685 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 686 register Scalar *x,*z,*v,sum; 687 int mbs=a->mbs,i,*idx,*ii,n; 688 689 VecGetArray_Fast(xx,x); 690 VecGetArray_Fast(zz,z); 691 692 idx = a->j; 693 v = a->a; 694 ii = a->i; 695 696 for ( i=0; i<mbs; i++ ) { 697 n = ii[1] - ii[0]; ii++; 698 sum = 0.0; 699 while (n--) sum += *v++ * x[*idx++]; 700 z[i] = sum; 701 } 702 VecRestoreArray_Fast(xx,x); 703 VecRestoreArray_Fast(zz,z); 704 PLogFlops(2*a->nz - a->m); 705 return 0; 706 } 707 708 static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz) 709 { 710 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 711 register Scalar *x,*z,*v,*xb,sum1,sum2; 712 register Scalar x1,x2; 713 int mbs=a->mbs,i,*idx,*ii; 714 int j,n; 715 716 VecGetArray_Fast(xx,x); 717 VecGetArray_Fast(zz,z); 718 719 idx = a->j; 720 v = a->a; 721 ii = a->i; 722 723 for ( i=0; i<mbs; i++ ) { 724 n = ii[1] - ii[0]; ii++; 725 sum1 = 0.0; sum2 = 0.0; 726 for ( j=0; j<n; j++ ) { 727 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 728 sum1 += v[0]*x1 + v[2]*x2; 729 sum2 += v[1]*x1 + v[3]*x2; 730 v += 4; 731 } 732 z[0] = sum1; z[1] = sum2; 733 z += 2; 734 } 735 VecRestoreArray_Fast(xx,x); 736 VecRestoreArray_Fast(zz,z); 737 PLogFlops(4*a->nz - a->m); 738 return 0; 739 } 740 741 static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz) 742 { 743 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 744 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 745 int mbs=a->mbs,i,*idx,*ii,j,n; 746 747 VecGetArray_Fast(xx,x); 748 VecGetArray_Fast(zz,z); 749 750 idx = a->j; 751 v = a->a; 752 ii = a->i; 753 754 for ( i=0; i<mbs; i++ ) { 755 n = ii[1] - ii[0]; ii++; 756 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 757 for ( j=0; j<n; j++ ) { 758 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 759 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 760 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 761 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 762 v += 9; 763 } 764 z[0] = sum1; z[1] = sum2; z[2] = sum3; 765 z += 3; 766 } 767 VecRestoreArray_Fast(xx,x); 768 VecRestoreArray_Fast(zz,z); 769 PLogFlops(18*a->nz - a->m); 770 return 0; 771 } 772 773 static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz) 774 { 775 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 776 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4; 777 register Scalar x1,x2,x3,x4; 778 int mbs=a->mbs,i,*idx,*ii; 779 int j,n; 780 781 VecGetArray_Fast(xx,x); 782 VecGetArray_Fast(zz,z); 783 784 idx = a->j; 785 v = a->a; 786 ii = a->i; 787 788 for ( i=0; i<mbs; i++ ) { 789 n = ii[1] - ii[0]; ii++; 790 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 791 for ( j=0; j<n; j++ ) { 792 xb = x + 4*(*idx++); 793 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 794 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 795 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 796 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 797 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 798 v += 16; 799 } 800 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 801 z += 4; 802 } 803 VecRestoreArray_Fast(xx,x); 804 VecRestoreArray_Fast(zz,z); 805 PLogFlops(32*a->nz - a->m); 806 return 0; 807 } 808 809 static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz) 810 { 811 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 812 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5; 813 register Scalar x1,x2,x3,x4,x5; 814 int mbs=a->mbs,i,*idx,*ii,j,n; 815 816 VecGetArray_Fast(xx,x); 817 VecGetArray_Fast(zz,z); 818 819 idx = a->j; 820 v = a->a; 821 ii = a->i; 822 823 for ( i=0; i<mbs; i++ ) { 824 n = ii[1] - ii[0]; ii++; 825 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 826 for ( j=0; j<n; j++ ) { 827 xb = x + 5*(*idx++); 828 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 829 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 830 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 831 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 832 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 833 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 834 v += 25; 835 } 836 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 837 z += 5; 838 } 839 VecRestoreArray_Fast(xx,x); 840 VecRestoreArray_Fast(zz,z); 841 PLogFlops(50*a->nz - a->m); 842 return 0; 843 } 844 845 static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz) 846 { 847 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 848 register Scalar *x,*z,*v,*xb; 849 int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2; 850 int _One = 1,ncols,k; 851 Scalar _DOne = 1.0,*work,*workt,_DZero = 0.0; 852 853 VecGetArray_Fast(xx,x); 854 VecGetArray_Fast(zz,z); 855 856 idx = a->j; 857 v = a->a; 858 ii = a->i; 859 860 861 if (!a->mult_work) { 862 k = PetscMax(a->m,a->n); 863 a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work); 864 } 865 work = a->mult_work; 866 for ( i=0; i<mbs; i++ ) { 867 n = ii[1] - ii[0]; ii++; 868 ncols = n*bs; 869 workt = work; 870 for ( j=0; j<n; j++ ) { 871 xb = x + bs*(*idx++); 872 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 873 workt += bs; 874 } 875 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One); 876 v += n*bs2; 877 z += bs; 878 } 879 VecRestoreArray_Fast(xx,x); 880 VecRestoreArray_Fast(zz,z); 881 PLogFlops(2*a->nz*bs2 - a->m); 882 return 0; 883 } 884 885 static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 886 { 887 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 888 register Scalar *x,*y,*z,*v,sum; 889 int mbs=a->mbs,i,*idx,*ii,n; 890 891 VecGetArray_Fast(xx,x); 892 VecGetArray_Fast(yy,y); 893 VecGetArray_Fast(zz,z); 894 895 idx = a->j; 896 v = a->a; 897 ii = a->i; 898 899 for ( i=0; i<mbs; i++ ) { 900 n = ii[1] - ii[0]; ii++; 901 sum = y[i]; 902 while (n--) sum += *v++ * x[*idx++]; 903 z[i] = sum; 904 } 905 VecRestoreArray_Fast(xx,x); 906 VecRestoreArray_Fast(yy,y); 907 VecRestoreArray_Fast(zz,z); 908 PLogFlops(2*a->nz); 909 return 0; 910 } 911 912 static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 913 { 914 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 915 register Scalar *x,*y,*z,*v,*xb,sum1,sum2; 916 register Scalar x1,x2; 917 int mbs=a->mbs,i,*idx,*ii; 918 int j,n; 919 920 VecGetArray_Fast(xx,x); 921 VecGetArray_Fast(yy,y); 922 VecGetArray_Fast(zz,z); 923 924 idx = a->j; 925 v = a->a; 926 ii = a->i; 927 928 for ( i=0; i<mbs; i++ ) { 929 n = ii[1] - ii[0]; ii++; 930 sum1 = y[0]; sum2 = y[1]; 931 for ( j=0; j<n; j++ ) { 932 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 933 sum1 += v[0]*x1 + v[2]*x2; 934 sum2 += v[1]*x1 + v[3]*x2; 935 v += 4; 936 } 937 z[0] = sum1; z[1] = sum2; 938 z += 2; y += 2; 939 } 940 VecRestoreArray_Fast(xx,x); 941 VecRestoreArray_Fast(yy,y); 942 VecRestoreArray_Fast(zz,z); 943 PLogFlops(4*a->nz); 944 return 0; 945 } 946 947 static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 948 { 949 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 950 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 951 int mbs=a->mbs,i,*idx,*ii,j,n; 952 953 VecGetArray_Fast(xx,x); 954 VecGetArray_Fast(yy,y); 955 VecGetArray_Fast(zz,z); 956 957 idx = a->j; 958 v = a->a; 959 ii = a->i; 960 961 for ( i=0; i<mbs; i++ ) { 962 n = ii[1] - ii[0]; ii++; 963 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; 964 for ( j=0; j<n; j++ ) { 965 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 966 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 967 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 968 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 969 v += 9; 970 } 971 z[0] = sum1; z[1] = sum2; z[2] = sum3; 972 z += 3; y += 3; 973 } 974 VecRestoreArray_Fast(xx,x); 975 VecRestoreArray_Fast(yy,y); 976 VecRestoreArray_Fast(zz,z); 977 PLogFlops(18*a->nz); 978 return 0; 979 } 980 981 static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 982 { 983 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 984 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4; 985 register Scalar x1,x2,x3,x4; 986 int mbs=a->mbs,i,*idx,*ii; 987 int j,n; 988 989 VecGetArray_Fast(xx,x); 990 VecGetArray_Fast(yy,y); 991 VecGetArray_Fast(zz,z); 992 993 idx = a->j; 994 v = a->a; 995 ii = a->i; 996 997 for ( i=0; i<mbs; i++ ) { 998 n = ii[1] - ii[0]; ii++; 999 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; 1000 for ( j=0; j<n; j++ ) { 1001 xb = x + 4*(*idx++); 1002 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1003 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 1004 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 1005 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 1006 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 1007 v += 16; 1008 } 1009 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 1010 z += 4; y += 4; 1011 } 1012 VecRestoreArray_Fast(xx,x); 1013 VecRestoreArray_Fast(yy,y); 1014 VecRestoreArray_Fast(zz,z); 1015 PLogFlops(32*a->nz); 1016 return 0; 1017 } 1018 1019 static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 1020 { 1021 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1022 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5; 1023 register Scalar x1,x2,x3,x4,x5; 1024 int mbs=a->mbs,i,*idx,*ii,j,n; 1025 1026 VecGetArray_Fast(xx,x); 1027 VecGetArray_Fast(yy,y); 1028 VecGetArray_Fast(zz,z); 1029 1030 idx = a->j; 1031 v = a->a; 1032 ii = a->i; 1033 1034 for ( i=0; i<mbs; i++ ) { 1035 n = ii[1] - ii[0]; ii++; 1036 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; 1037 for ( j=0; j<n; j++ ) { 1038 xb = x + 5*(*idx++); 1039 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 1040 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 1041 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 1042 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 1043 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 1044 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 1045 v += 25; 1046 } 1047 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 1048 z += 5; y += 5; 1049 } 1050 VecRestoreArray_Fast(xx,x); 1051 VecRestoreArray_Fast(yy,y); 1052 VecRestoreArray_Fast(zz,z); 1053 PLogFlops(50*a->nz); 1054 return 0; 1055 } 1056 1057 static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 1058 { 1059 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1060 register Scalar *x,*z,*v,*xb; 1061 int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr; 1062 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1063 1064 if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 1065 1066 VecGetArray_Fast(xx,x); 1067 VecGetArray_Fast(zz,z); 1068 1069 idx = a->j; 1070 v = a->a; 1071 ii = a->i; 1072 1073 1074 if (!a->mult_work) { 1075 k = PetscMax(a->m,a->n); 1076 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work); 1077 } 1078 work = a->mult_work; 1079 for ( i=0; i<mbs; i++ ) { 1080 n = ii[1] - ii[0]; ii++; 1081 ncols = n*bs; 1082 workt = work; 1083 for ( j=0; j<n; j++ ) { 1084 xb = x + bs*(*idx++); 1085 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 1086 workt += bs; 1087 } 1088 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); 1089 v += n*bs2; 1090 z += bs; 1091 } 1092 VecRestoreArray_Fast(xx,x); 1093 VecRestoreArray_Fast(zz,z); 1094 PLogFlops(2*a->nz*bs2 ); 1095 return 0; 1096 } 1097 1098 static int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz) 1099 { 1100 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1101 Scalar *xg,*zg,*zb; 1102 register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 1103 int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 1104 int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1105 1106 1107 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 1108 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 1109 PetscMemzero(z,N*sizeof(Scalar)); 1110 1111 idx = a->j; 1112 v = a->a; 1113 ii = a->i; 1114 1115 switch (bs) { 1116 case 1: 1117 for ( i=0; i<mbs; i++ ) { 1118 n = ii[1] - ii[0]; ii++; 1119 xb = x + i; x1 = xb[0]; 1120 ib = idx + ai[i]; 1121 for ( j=0; j<n; j++ ) { 1122 rval = ib[j]; 1123 z[rval] += *v++ * x1; 1124 } 1125 } 1126 break; 1127 case 2: 1128 for ( i=0; i<mbs; i++ ) { 1129 n = ii[1] - ii[0]; ii++; 1130 xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1131 ib = idx + ai[i]; 1132 for ( j=0; j<n; j++ ) { 1133 rval = ib[j]*2; 1134 z[rval++] += v[0]*x1 + v[1]*x2; 1135 z[rval++] += v[2]*x1 + v[3]*x2; 1136 v += 4; 1137 } 1138 } 1139 break; 1140 case 3: 1141 for ( i=0; i<mbs; i++ ) { 1142 n = ii[1] - ii[0]; ii++; 1143 xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1144 ib = idx + ai[i]; 1145 for ( j=0; j<n; j++ ) { 1146 rval = ib[j]*3; 1147 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1148 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1149 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1150 v += 9; 1151 } 1152 } 1153 break; 1154 case 4: 1155 for ( i=0; i<mbs; i++ ) { 1156 n = ii[1] - ii[0]; ii++; 1157 xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1158 ib = idx + ai[i]; 1159 for ( j=0; j<n; j++ ) { 1160 rval = ib[j]*4; 1161 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1162 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1163 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1164 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1165 v += 16; 1166 } 1167 } 1168 break; 1169 case 5: 1170 for ( i=0; i<mbs; i++ ) { 1171 n = ii[1] - ii[0]; ii++; 1172 xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1173 x4 = xb[3]; x5 = xb[4]; 1174 ib = idx + ai[i]; 1175 for ( j=0; j<n; j++ ) { 1176 rval = ib[j]*5; 1177 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1178 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1179 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1180 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1181 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1182 v += 25; 1183 } 1184 } 1185 break; 1186 /* block sizes larger then 5 by 5 are handled by BLAS */ 1187 default: { 1188 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1189 if (!a->mult_work) { 1190 k = PetscMax(a->m,a->n); 1191 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1192 CHKPTRQ(a->mult_work); 1193 } 1194 work = a->mult_work; 1195 for ( i=0; i<mbs; i++ ) { 1196 n = ii[1] - ii[0]; ii++; 1197 ncols = n*bs; 1198 PetscMemzero(work,ncols*sizeof(Scalar)); 1199 LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1200 v += n*bs2; 1201 x += bs; 1202 workt = work; 1203 for ( j=0; j<n; j++ ) { 1204 zb = z + bs*(*idx++); 1205 for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1206 workt += bs; 1207 } 1208 } 1209 } 1210 } 1211 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 1212 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1213 PLogFlops(2*a->nz*a->bs2 - a->n); 1214 return 0; 1215 } 1216 static int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1217 { 1218 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1219 Scalar *xg,*zg,*zb; 1220 register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 1221 int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 1222 int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1223 1224 1225 1226 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 1227 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 1228 1229 if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 1230 else PetscMemzero(z,N*sizeof(Scalar)); 1231 1232 1233 idx = a->j; 1234 v = a->a; 1235 ii = a->i; 1236 1237 switch (bs) { 1238 case 1: 1239 for ( i=0; i<mbs; i++ ) { 1240 n = ii[1] - ii[0]; ii++; 1241 xb = x + i; x1 = xb[0]; 1242 ib = idx + ai[i]; 1243 for ( j=0; j<n; j++ ) { 1244 rval = ib[j]; 1245 z[rval] += *v++ * x1; 1246 } 1247 } 1248 break; 1249 case 2: 1250 for ( i=0; i<mbs; i++ ) { 1251 n = ii[1] - ii[0]; ii++; 1252 xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1253 ib = idx + ai[i]; 1254 for ( j=0; j<n; j++ ) { 1255 rval = ib[j]*2; 1256 z[rval++] += v[0]*x1 + v[1]*x2; 1257 z[rval++] += v[2]*x1 + v[3]*x2; 1258 v += 4; 1259 } 1260 } 1261 break; 1262 case 3: 1263 for ( i=0; i<mbs; i++ ) { 1264 n = ii[1] - ii[0]; ii++; 1265 xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1266 ib = idx + ai[i]; 1267 for ( j=0; j<n; j++ ) { 1268 rval = ib[j]*3; 1269 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1270 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1271 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1272 v += 9; 1273 } 1274 } 1275 break; 1276 case 4: 1277 for ( i=0; i<mbs; i++ ) { 1278 n = ii[1] - ii[0]; ii++; 1279 xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1280 ib = idx + ai[i]; 1281 for ( j=0; j<n; j++ ) { 1282 rval = ib[j]*4; 1283 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1284 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1285 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1286 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1287 v += 16; 1288 } 1289 } 1290 break; 1291 case 5: 1292 for ( i=0; i<mbs; i++ ) { 1293 n = ii[1] - ii[0]; ii++; 1294 xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1295 x4 = xb[3]; x5 = xb[4]; 1296 ib = idx + ai[i]; 1297 for ( j=0; j<n; j++ ) { 1298 rval = ib[j]*5; 1299 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1300 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1301 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1302 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1303 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1304 v += 25; 1305 } 1306 } 1307 break; 1308 /* block sizes larger then 5 by 5 are handled by BLAS */ 1309 default: { 1310 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1311 if (!a->mult_work) { 1312 k = PetscMax(a->m,a->n); 1313 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1314 CHKPTRQ(a->mult_work); 1315 } 1316 work = a->mult_work; 1317 for ( i=0; i<mbs; i++ ) { 1318 n = ii[1] - ii[0]; ii++; 1319 ncols = n*bs; 1320 PetscMemzero(work,ncols*sizeof(Scalar)); 1321 LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1322 v += n*bs2; 1323 x += bs; 1324 workt = work; 1325 for ( j=0; j<n; j++ ) { 1326 zb = z + bs*(*idx++); 1327 for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1328 workt += bs; 1329 } 1330 } 1331 } 1332 } 1333 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 1334 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1335 PLogFlops(2*a->nz*a->bs2); 1336 return 0; 1337 } 1338 1339 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info) 1340 { 1341 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1342 1343 info->rows_global = (double)a->m; 1344 info->columns_global = (double)a->n; 1345 info->rows_local = (double)a->m; 1346 info->columns_local = (double)a->n; 1347 info->block_size = a->bs2; 1348 info->nz_allocated = a->maxnz; 1349 info->nz_used = a->bs2*a->nz; 1350 info->nz_unneeded = (double)(info->nz_allocated - info->nz_used); 1351 /* if (info->nz_unneeded != A->info.nz_unneeded) 1352 printf("space descrepancy: maxnz-nz = %d, nz_unneeded = %d\n",(int)info->nz_unneeded,(int)A->info.nz_unneeded); */ 1353 info->assemblies = A->num_ass; 1354 info->mallocs = a->reallocs; 1355 info->memory = A->mem; 1356 if (A->factor) { 1357 info->fill_ratio_given = A->info.fill_ratio_given; 1358 info->fill_ratio_needed = A->info.fill_ratio_needed; 1359 info->factor_mallocs = A->info.factor_mallocs; 1360 } else { 1361 info->fill_ratio_given = 0; 1362 info->fill_ratio_needed = 0; 1363 info->factor_mallocs = 0; 1364 } 1365 return 0; 1366 } 1367 1368 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg) 1369 { 1370 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 1371 1372 if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type"); 1373 1374 /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 1375 if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)|| 1376 (a->nz != b->nz)) { 1377 *flg = PETSC_FALSE; return 0; 1378 } 1379 1380 /* if the a->i are the same */ 1381 if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) { 1382 *flg = PETSC_FALSE; return 0; 1383 } 1384 1385 /* if a->j are the same */ 1386 if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) { 1387 *flg = PETSC_FALSE; return 0; 1388 } 1389 1390 /* if a->a are the same */ 1391 if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) { 1392 *flg = PETSC_FALSE; return 0; 1393 } 1394 *flg = PETSC_TRUE; 1395 return 0; 1396 1397 } 1398 1399 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v) 1400 { 1401 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1402 int i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 1403 Scalar *x, zero = 0.0,*aa,*aa_j; 1404 1405 bs = a->bs; 1406 aa = a->a; 1407 ai = a->i; 1408 aj = a->j; 1409 ambs = a->mbs; 1410 bs2 = a->bs2; 1411 1412 VecSet(&zero,v); 1413 VecGetArray(v,&x); VecGetLocalSize(v,&n); 1414 if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector"); 1415 for ( i=0; i<ambs; i++ ) { 1416 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1417 if (aj[j] == i) { 1418 row = i*bs; 1419 aa_j = aa+j*bs2; 1420 for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 1421 break; 1422 } 1423 } 1424 } 1425 return 0; 1426 } 1427 1428 static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr) 1429 { 1430 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1431 Scalar *l,*r,x,*v,*aa,*li,*ri; 1432 int i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2; 1433 1434 ai = a->i; 1435 aj = a->j; 1436 aa = a->a; 1437 m = a->m; 1438 n = a->n; 1439 bs = a->bs; 1440 mbs = a->mbs; 1441 bs2 = a->bs2; 1442 if (ll) { 1443 VecGetArray(ll,&l); VecGetSize(ll,&lm); 1444 if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length"); 1445 for ( i=0; i<mbs; i++ ) { /* for each block row */ 1446 M = ai[i+1] - ai[i]; 1447 li = l + i*bs; 1448 v = aa + bs2*ai[i]; 1449 for ( j=0; j<M; j++ ) { /* for each block */ 1450 for ( k=0; k<bs2; k++ ) { 1451 (*v++) *= li[k%bs]; 1452 } 1453 } 1454 } 1455 } 1456 1457 if (rr) { 1458 VecGetArray(rr,&r); VecGetSize(rr,&rn); 1459 if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length"); 1460 for ( i=0; i<mbs; i++ ) { /* for each block row */ 1461 M = ai[i+1] - ai[i]; 1462 v = aa + bs2*ai[i]; 1463 for ( j=0; j<M; j++ ) { /* for each block */ 1464 ri = r + bs*aj[ai[i]+j]; 1465 for ( k=0; k<bs; k++ ) { 1466 x = ri[k]; 1467 for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x; 1468 } 1469 } 1470 } 1471 } 1472 return 0; 1473 } 1474 1475 1476 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 1477 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 1478 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int); 1479 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*); 1480 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**); 1481 1482 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec); 1483 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec); 1484 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec); 1485 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec); 1486 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec); 1487 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec); 1488 1489 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 1490 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 1491 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 1492 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 1493 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 1494 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 1495 1496 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm) 1497 { 1498 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1499 Scalar *v = a->a; 1500 double sum = 0.0; 1501 int i,nz=a->nz,bs2=a->bs2; 1502 1503 if (type == NORM_FROBENIUS) { 1504 for (i=0; i< bs2*nz; i++ ) { 1505 #if defined(PETSC_COMPLEX) 1506 sum += real(conj(*v)*(*v)); v++; 1507 #else 1508 sum += (*v)*(*v); v++; 1509 #endif 1510 } 1511 *norm = sqrt(sum); 1512 } 1513 else { 1514 SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet"); 1515 } 1516 return 0; 1517 } 1518 1519 /* 1520 note: This can only work for identity for row and col. It would 1521 be good to check this and otherwise generate an error. 1522 */ 1523 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill) 1524 { 1525 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1526 Mat outA; 1527 int ierr; 1528 1529 if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported"); 1530 1531 outA = inA; 1532 inA->factor = FACTOR_LU; 1533 a->row = row; 1534 a->col = col; 1535 1536 a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 1537 1538 if (!a->diag) { 1539 ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr); 1540 } 1541 ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr); 1542 return 0; 1543 } 1544 1545 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA) 1546 { 1547 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1548 int one = 1, totalnz = a->bs2*a->nz; 1549 BLscal_( &totalnz, alpha, a->a, &one ); 1550 PLogFlops(totalnz); 1551 return 0; 1552 } 1553 1554 static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 1555 { 1556 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1557 int *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 1558 int *ai = a->i, *ailen = a->ilen; 1559 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 1560 Scalar *ap, *aa = a->a, zero = 0.0; 1561 1562 for ( k=0; k<m; k++ ) { /* loop over rows */ 1563 row = im[k]; brow = row/bs; 1564 if (row < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative row"); 1565 if (row >= a->m) SETERRQ(1,"MatGetValues_SeqBAIJ:Row too large"); 1566 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1567 nrow = ailen[brow]; 1568 for ( l=0; l<n; l++ ) { /* loop over columns */ 1569 if (in[l] < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative column"); 1570 if (in[l] >= a->n) SETERRQ(1,"MatGetValues_SeqBAIJ:Column too large"); 1571 col = in[l] ; 1572 bcol = col/bs; 1573 cidx = col%bs; 1574 ridx = row%bs; 1575 high = nrow; 1576 low = 0; /* assume unsorted */ 1577 while (high-low > 5) { 1578 t = (low+high)/2; 1579 if (rp[t] > bcol) high = t; 1580 else low = t; 1581 } 1582 for ( i=low; i<high; i++ ) { 1583 if (rp[i] > bcol) break; 1584 if (rp[i] == bcol) { 1585 *v++ = ap[bs2*i+bs*cidx+ridx]; 1586 goto finished; 1587 } 1588 } 1589 *v++ = zero; 1590 finished:; 1591 } 1592 } 1593 return 0; 1594 } 1595 1596 static int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs) 1597 { 1598 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data; 1599 *bs = baij->bs; 1600 return 0; 1601 } 1602 1603 /* idx should be of length atlease bs */ 1604 static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg) 1605 { 1606 int i,row; 1607 row = idx[0]; 1608 if (row%bs!=0) { *flg = PETSC_FALSE; return 0; } 1609 1610 for ( i=1; i<bs; i++ ) { 1611 if (row+i != idx[i]) { *flg = PETSC_FALSE; return 0; } 1612 } 1613 *flg = PETSC_TRUE; 1614 return 0; 1615 } 1616 1617 static int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag) 1618 { 1619 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1620 IS is_local; 1621 int ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2; 1622 PetscTruth flg; 1623 Scalar zero = 0.0,*aa; 1624 1625 /* Make a copy of the IS and sort it */ 1626 ierr = ISGetSize(is,&is_n);CHKERRQ(ierr); 1627 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1628 ierr = ISCreateGeneral(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr); 1629 ierr = ISSort(is_local); CHKERRQ(ierr); 1630 ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr); 1631 1632 i = 0; 1633 while (i < is_n) { 1634 if (rows[i]<0 || rows[i]>m) SETERRQ(1,"MatZeroRows_SeqBAIJ:row out of range"); 1635 flg = PETSC_FALSE; 1636 if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); } 1637 if (flg) { /* There exists a block of rows to be Zerowed */ 1638 baij->ilen[rows[i]/bs] = 0; 1639 i += bs; 1640 } else { /* Zero out only the requested row */ 1641 count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs; 1642 aa = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs); 1643 for ( j=0; j<count; j++ ) { 1644 aa[0] = zero; 1645 aa+=bs; 1646 } 1647 i++; 1648 } 1649 } 1650 if (diag) { 1651 for ( j=0; j<is_n; j++ ) { 1652 ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1653 } 1654 } 1655 ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr); 1656 ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr); 1657 ierr = ISDestroy(is_local); CHKERRQ(ierr); 1658 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1659 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1660 1661 return 0; 1662 } 1663 int MatPrintHelp_SeqBAIJ(Mat A) 1664 { 1665 static int called = 0; 1666 MPI_Comm comm = A->comm; 1667 1668 if (called) return 0; else called = 1; 1669 PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n"); 1670 PetscPrintf(comm," -mat_block_size <block_size>\n"); 1671 return 0; 1672 } 1673 1674 /* -------------------------------------------------------------------*/ 1675 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ, 1676 MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ, 1677 MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N, 1678 MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ, 1679 MatSolve_SeqBAIJ_N,0, 1680 0,0, 1681 MatLUFactor_SeqBAIJ,0, 1682 0, 1683 MatTranspose_SeqBAIJ, 1684 MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ, 1685 MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ, 1686 0,MatAssemblyEnd_SeqBAIJ, 1687 0, 1688 MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ, 1689 MatGetReordering_SeqBAIJ, 1690 MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0, 1691 MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ, 1692 MatILUFactorSymbolic_SeqBAIJ,0, 1693 0,0,/* MatConvert_SeqBAIJ */ 0, 1694 MatConvertSameType_SeqBAIJ,0,0, 1695 MatILUFactor_SeqBAIJ,0,0, 1696 MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ, 1697 MatGetValues_SeqBAIJ,0, 1698 MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ, 1699 0,0,0,MatGetBlockSize_SeqBAIJ}; 1700 1701 /*@C 1702 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1703 compressed row) format. For good matrix assembly performance the 1704 user should preallocate the matrix storage by setting the parameter nz 1705 (or the array nzz). By setting these parameters accurately, performance 1706 during matrix assembly can be increased by more than a factor of 50. 1707 1708 Input Parameters: 1709 . comm - MPI communicator, set to MPI_COMM_SELF 1710 . bs - size of block 1711 . m - number of rows 1712 . n - number of columns 1713 . nz - number of block nonzeros per block row (same for all rows) 1714 . nzz - array containing the number of block nonzeros in the various block rows 1715 (possibly different for each block row) or PETSC_NULL 1716 1717 Output Parameter: 1718 . A - the matrix 1719 1720 Notes: 1721 The block AIJ format is fully compatible with standard Fortran 77 1722 storage. That is, the stored row and column indices can begin at 1723 either one (as in Fortran) or zero. See the users' manual for details. 1724 1725 Specify the preallocated storage with either nz or nnz (not both). 1726 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1727 allocation. For additional details, see the users manual chapter on 1728 matrices and the file $(PETSC_DIR)/Performance. 1729 1730 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1731 @*/ 1732 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 1733 { 1734 Mat B; 1735 Mat_SeqBAIJ *b; 1736 int i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs; 1737 1738 if (mbs*bs!=m || nbs*bs!=n) 1739 SETERRQ(1,"MatCreateSeqBAIJ:Number rows, cols must be divisible by blocksize"); 1740 1741 *A = 0; 1742 PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm); 1743 PLogObjectCreate(B); 1744 B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b); 1745 PetscMemzero(b,sizeof(Mat_SeqBAIJ)); 1746 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 1747 ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr); 1748 if (!flg) { 1749 switch (bs) { 1750 case 1: 1751 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1752 B->ops.solve = MatSolve_SeqBAIJ_1; 1753 B->ops.mult = MatMult_SeqBAIJ_1; 1754 B->ops.multadd = MatMultAdd_SeqBAIJ_1; 1755 break; 1756 case 2: 1757 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 1758 B->ops.solve = MatSolve_SeqBAIJ_2; 1759 B->ops.mult = MatMult_SeqBAIJ_2; 1760 B->ops.multadd = MatMultAdd_SeqBAIJ_2; 1761 break; 1762 case 3: 1763 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 1764 B->ops.solve = MatSolve_SeqBAIJ_3; 1765 B->ops.mult = MatMult_SeqBAIJ_3; 1766 B->ops.multadd = MatMultAdd_SeqBAIJ_3; 1767 break; 1768 case 4: 1769 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 1770 B->ops.solve = MatSolve_SeqBAIJ_4; 1771 B->ops.mult = MatMult_SeqBAIJ_4; 1772 B->ops.multadd = MatMultAdd_SeqBAIJ_4; 1773 break; 1774 case 5: 1775 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 1776 B->ops.solve = MatSolve_SeqBAIJ_5; 1777 B->ops.mult = MatMult_SeqBAIJ_5; 1778 B->ops.multadd = MatMultAdd_SeqBAIJ_5; 1779 break; 1780 } 1781 } 1782 B->destroy = MatDestroy_SeqBAIJ; 1783 B->view = MatView_SeqBAIJ; 1784 B->factor = 0; 1785 B->lupivotthreshold = 1.0; 1786 b->row = 0; 1787 b->col = 0; 1788 b->reallocs = 0; 1789 1790 b->m = m; B->m = m; B->M = m; 1791 b->n = n; B->n = n; B->N = n; 1792 b->mbs = mbs; 1793 b->nbs = nbs; 1794 b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax); 1795 if (nnz == PETSC_NULL) { 1796 if (nz == PETSC_DEFAULT) nz = 5; 1797 else if (nz <= 0) nz = 1; 1798 for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 1799 nz = nz*mbs; 1800 } 1801 else { 1802 nz = 0; 1803 for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 1804 } 1805 1806 /* allocate the matrix space */ 1807 len = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int); 1808 b->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a); 1809 PetscMemzero(b->a,nz*bs2*sizeof(Scalar)); 1810 b->j = (int *) (b->a + nz*bs2); 1811 PetscMemzero(b->j,nz*sizeof(int)); 1812 b->i = b->j + nz; 1813 b->singlemalloc = 1; 1814 1815 b->i[0] = 0; 1816 for (i=1; i<mbs+1; i++) { 1817 b->i[i] = b->i[i-1] + b->imax[i-1]; 1818 } 1819 1820 /* b->ilen will count nonzeros in each block row so far. */ 1821 b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 1822 PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 1823 for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 1824 1825 b->bs = bs; 1826 b->bs2 = bs2; 1827 b->mbs = mbs; 1828 b->nz = 0; 1829 b->maxnz = nz*bs2; 1830 b->sorted = 0; 1831 b->roworiented = 1; 1832 b->nonew = 0; 1833 b->diag = 0; 1834 b->solve_work = 0; 1835 b->mult_work = 0; 1836 b->spptr = 0; 1837 B->info.nz_unneeded = (double)b->maxnz; 1838 1839 *A = B; 1840 ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr); 1841 if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); } 1842 return 0; 1843 } 1844 1845 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues) 1846 { 1847 Mat C; 1848 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 1849 int i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 1850 1851 if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix"); 1852 1853 *B = 0; 1854 PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm); 1855 PLogObjectCreate(C); 1856 C->data = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c); 1857 PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps)); 1858 C->destroy = MatDestroy_SeqBAIJ; 1859 C->view = MatView_SeqBAIJ; 1860 C->factor = A->factor; 1861 c->row = 0; 1862 c->col = 0; 1863 C->assembled = PETSC_TRUE; 1864 1865 c->m = C->m = a->m; 1866 c->n = C->n = a->n; 1867 C->M = a->m; 1868 C->N = a->n; 1869 1870 c->bs = a->bs; 1871 c->bs2 = a->bs2; 1872 c->mbs = a->mbs; 1873 c->nbs = a->nbs; 1874 1875 c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax); 1876 c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen); 1877 for ( i=0; i<mbs; i++ ) { 1878 c->imax[i] = a->imax[i]; 1879 c->ilen[i] = a->ilen[i]; 1880 } 1881 1882 /* allocate the matrix space */ 1883 c->singlemalloc = 1; 1884 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int)); 1885 c->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a); 1886 c->j = (int *) (c->a + nz*bs2); 1887 c->i = c->j + nz; 1888 PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int)); 1889 if (mbs > 0) { 1890 PetscMemcpy(c->j,a->j,nz*sizeof(int)); 1891 if (cpvalues == COPY_VALUES) { 1892 PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar)); 1893 } 1894 } 1895 1896 PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 1897 c->sorted = a->sorted; 1898 c->roworiented = a->roworiented; 1899 c->nonew = a->nonew; 1900 1901 if (a->diag) { 1902 c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag); 1903 PLogObjectMemory(C,(mbs+1)*sizeof(int)); 1904 for ( i=0; i<mbs; i++ ) { 1905 c->diag[i] = a->diag[i]; 1906 } 1907 } 1908 else c->diag = 0; 1909 c->nz = a->nz; 1910 c->maxnz = a->maxnz; 1911 c->solve_work = 0; 1912 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1913 c->mult_work = 0; 1914 *B = C; 1915 return 0; 1916 } 1917 1918 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 1919 { 1920 Mat_SeqBAIJ *a; 1921 Mat B; 1922 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg; 1923 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1924 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1925 int *masked, nmask,tmp,bs2,ishift; 1926 Scalar *aa; 1927 MPI_Comm comm = ((PetscObject) viewer)->comm; 1928 1929 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 1930 bs2 = bs*bs; 1931 1932 MPI_Comm_size(comm,&size); 1933 if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor"); 1934 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1935 ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr); 1936 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object"); 1937 M = header[1]; N = header[2]; nz = header[3]; 1938 1939 if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices"); 1940 1941 /* 1942 This code adds extra rows to make sure the number of rows is 1943 divisible by the blocksize 1944 */ 1945 mbs = M/bs; 1946 extra_rows = bs - M + bs*(mbs); 1947 if (extra_rows == bs) extra_rows = 0; 1948 else mbs++; 1949 if (extra_rows) { 1950 PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 1951 } 1952 1953 /* read in row lengths */ 1954 rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 1955 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 1956 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 1957 1958 /* read in column indices */ 1959 jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj); 1960 ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr); 1961 for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 1962 1963 /* loop over row lengths determining block row lengths */ 1964 browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 1965 PetscMemzero(browlengths,mbs*sizeof(int)); 1966 mask = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask); 1967 PetscMemzero(mask,mbs*sizeof(int)); 1968 masked = mask + mbs; 1969 rowcount = 0; nzcount = 0; 1970 for ( i=0; i<mbs; i++ ) { 1971 nmask = 0; 1972 for ( j=0; j<bs; j++ ) { 1973 kmax = rowlengths[rowcount]; 1974 for ( k=0; k<kmax; k++ ) { 1975 tmp = jj[nzcount++]/bs; 1976 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1977 } 1978 rowcount++; 1979 } 1980 browlengths[i] += nmask; 1981 /* zero out the mask elements we set */ 1982 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 1983 } 1984 1985 /* create our matrix */ 1986 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A); 1987 CHKERRQ(ierr); 1988 B = *A; 1989 a = (Mat_SeqBAIJ *) B->data; 1990 1991 /* set matrix "i" values */ 1992 a->i[0] = 0; 1993 for ( i=1; i<= mbs; i++ ) { 1994 a->i[i] = a->i[i-1] + browlengths[i-1]; 1995 a->ilen[i-1] = browlengths[i-1]; 1996 } 1997 a->nz = 0; 1998 for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 1999 2000 /* read in nonzero values */ 2001 aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 2002 ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr); 2003 for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 2004 2005 /* set "a" and "j" values into matrix */ 2006 nzcount = 0; jcount = 0; 2007 for ( i=0; i<mbs; i++ ) { 2008 nzcountb = nzcount; 2009 nmask = 0; 2010 for ( j=0; j<bs; j++ ) { 2011 kmax = rowlengths[i*bs+j]; 2012 for ( k=0; k<kmax; k++ ) { 2013 tmp = jj[nzcount++]/bs; 2014 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2015 } 2016 rowcount++; 2017 } 2018 /* sort the masked values */ 2019 PetscSortInt(nmask,masked); 2020 2021 /* set "j" values into matrix */ 2022 maskcount = 1; 2023 for ( j=0; j<nmask; j++ ) { 2024 a->j[jcount++] = masked[j]; 2025 mask[masked[j]] = maskcount++; 2026 } 2027 /* set "a" values into matrix */ 2028 ishift = bs2*a->i[i]; 2029 for ( j=0; j<bs; j++ ) { 2030 kmax = rowlengths[i*bs+j]; 2031 for ( k=0; k<kmax; k++ ) { 2032 tmp = jj[nzcountb]/bs ; 2033 block = mask[tmp] - 1; 2034 point = jj[nzcountb] - bs*tmp; 2035 idx = ishift + bs2*block + j + bs*point; 2036 a->a[idx] = aa[nzcountb++]; 2037 } 2038 } 2039 /* zero out the mask elements we set */ 2040 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 2041 } 2042 if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix"); 2043 2044 PetscFree(rowlengths); 2045 PetscFree(browlengths); 2046 PetscFree(aa); 2047 PetscFree(jj); 2048 PetscFree(mask); 2049 2050 B->assembled = PETSC_TRUE; 2051 2052 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr); 2053 if (flg) { 2054 Viewer tviewer; 2055 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2056 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO,0);CHKERRQ(ierr); 2057 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2058 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2059 } 2060 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr); 2061 if (flg) { 2062 Viewer tviewer; 2063 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2064 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr); 2065 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2066 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2067 } 2068 ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr); 2069 if (flg) { 2070 Viewer tviewer; 2071 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2072 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2073 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2074 } 2075 ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr); 2076 if (flg) { 2077 Viewer tviewer; 2078 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2079 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_MATLAB,"M");CHKERRQ(ierr); 2080 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2081 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2082 } 2083 ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr); 2084 if (flg) { 2085 Viewer tviewer; 2086 ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr); 2087 ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr); 2088 ierr = ViewerFlush(tviewer); CHKERRQ(ierr); 2089 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2090 } 2091 return 0; 2092 } 2093 2094 2095 2096