1 2 #ifndef lint 3 static char vcid[] = "$Id: baij.c,v 1.61 1996/07/31 20:25:01 balay Exp balay $"; 4 #endif 5 6 /* 7 Defines the basic matrix operations for the BAIJ (compressed row) 8 matrix storage format. 9 */ 10 #include "baij.h" 11 #include "src/vec/vecimpl.h" 12 #include "src/inline/spops.h" 13 #include "petsc.h" 14 15 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 16 17 static int MatGetReordering_SeqBAIJ(Mat A,MatReordering type,IS *rperm,IS *cperm) 18 { 19 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 20 int ierr, *ia, *ja,n = a->mbs,*idx,i,ishift,oshift; 21 22 /* 23 this is tacky: In the future when we have written special factorization 24 and solve routines for the identity permutation we should use a 25 stride index set instead of the general one. 26 */ 27 if (type == ORDER_NATURAL) { 28 idx = (int *) PetscMalloc( n*sizeof(int) ); CHKPTRQ(idx); 29 for ( i=0; i<n; i++ ) idx[i] = i; 30 ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,rperm); CHKERRQ(ierr); 31 ierr = ISCreateSeq(MPI_COMM_SELF,n,idx,cperm); CHKERRQ(ierr); 32 PetscFree(idx); 33 ISSetPermutation(*rperm); 34 ISSetPermutation(*cperm); 35 ISSetIdentity(*rperm); 36 ISSetIdentity(*cperm); 37 return 0; 38 } 39 40 MatReorderingRegisterAll(); 41 ishift = 0; 42 oshift = -MatReorderingIndexShift[(int)type]; 43 if (MatReorderingRequiresSymmetric[(int)type]) { 44 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,ishift,oshift,&ia,&ja);CHKERRQ(ierr); 45 ierr = MatGetReordering_IJ(n,ia,ja,type,rperm,cperm); CHKERRQ(ierr); 46 PetscFree(ia); PetscFree(ja); 47 } else { 48 if (ishift == oshift) { 49 ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 50 } 51 else if (ishift == -1) { 52 /* temporarily subtract 1 from i and j indices */ 53 int nz = a->i[n] - 1; 54 for ( i=0; i<nz; i++ ) a->j[i]--; 55 for ( i=0; i<n+1; i++ ) a->i[i]--; 56 ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 57 for ( i=0; i<nz; i++ ) a->j[i]++; 58 for ( i=0; i<n+1; i++ ) a->i[i]++; 59 } else { 60 /* temporarily add 1 to i and j indices */ 61 int nz = a->i[n] - 1; 62 for ( i=0; i<nz; i++ ) a->j[i]++; 63 for ( i=0; i<n+1; i++ ) a->i[i]++; 64 ierr = MatGetReordering_IJ(n,a->i,a->j,type,rperm,cperm);CHKERRQ(ierr); 65 for ( i=0; i<nz; i++ ) a->j[i]--; 66 for ( i=0; i<n+1; i++ ) a->i[i]--; 67 } 68 } 69 return 0; 70 } 71 72 /* 73 Adds diagonal pointers to sparse matrix structure. 74 */ 75 76 int MatMarkDiag_SeqBAIJ(Mat A) 77 { 78 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 79 int i,j, *diag, m = a->mbs; 80 81 diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag); 82 PLogObjectMemory(A,(m+1)*sizeof(int)); 83 for ( i=0; i<m; i++ ) { 84 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 85 if (a->j[j] == i) { 86 diag[i] = j; 87 break; 88 } 89 } 90 } 91 a->diag = diag; 92 return 0; 93 } 94 95 #include "draw.h" 96 #include "pinclude/pviewer.h" 97 #include "sys.h" 98 99 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 100 { 101 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 102 int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 103 Scalar *aa; 104 105 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 106 col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 107 col_lens[0] = MAT_COOKIE; 108 col_lens[1] = a->m; 109 col_lens[2] = a->n; 110 col_lens[3] = a->nz*bs2; 111 112 /* store lengths of each row and write (including header) to file */ 113 count = 0; 114 for ( i=0; i<a->mbs; i++ ) { 115 for ( j=0; j<bs; j++ ) { 116 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 117 } 118 } 119 ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr); 120 PetscFree(col_lens); 121 122 /* store column indices (zero start index) */ 123 jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj); 124 count = 0; 125 for ( i=0; i<a->mbs; i++ ) { 126 for ( j=0; j<bs; j++ ) { 127 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 128 for ( l=0; l<bs; l++ ) { 129 jj[count++] = bs*a->j[k] + l; 130 } 131 } 132 } 133 } 134 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr); 135 PetscFree(jj); 136 137 /* store nonzero values */ 138 aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa); 139 count = 0; 140 for ( i=0; i<a->mbs; i++ ) { 141 for ( j=0; j<bs; j++ ) { 142 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 143 for ( l=0; l<bs; l++ ) { 144 aa[count++] = a->a[bs2*k + l*bs + j]; 145 } 146 } 147 } 148 } 149 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr); 150 PetscFree(aa); 151 return 0; 152 } 153 154 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 155 { 156 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 157 int ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2; 158 FILE *fd; 159 char *outputname; 160 161 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 162 ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr); 163 ierr = ViewerGetFormat(viewer,&format); 164 if (format == ASCII_FORMAT_INFO || format == ASCII_FORMAT_INFO_DETAILED) { 165 fprintf(fd," block size is %d\n",bs); 166 } 167 else if (format == ASCII_FORMAT_MATLAB) { 168 SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported"); 169 } 170 else if (format == ASCII_FORMAT_COMMON) { 171 for ( i=0; i<a->mbs; i++ ) { 172 for ( j=0; j<bs; j++ ) { 173 fprintf(fd,"row %d:",i*bs+j); 174 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 175 for ( l=0; l<bs; l++ ) { 176 #if defined(PETSC_COMPLEX) 177 if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0) 178 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 179 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 180 else if (real(a->a[bs2*k + l*bs + j]) != 0.0) 181 fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 182 #else 183 if (a->a[bs2*k + l*bs + j] != 0.0) 184 fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 185 #endif 186 } 187 } 188 fprintf(fd,"\n"); 189 } 190 } 191 } 192 else { 193 for ( i=0; i<a->mbs; i++ ) { 194 for ( j=0; j<bs; j++ ) { 195 fprintf(fd,"row %d:",i*bs+j); 196 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 197 for ( l=0; l<bs; l++ ) { 198 #if defined(PETSC_COMPLEX) 199 if (imag(a->a[bs2*k + l*bs + j]) != 0.0) { 200 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 201 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 202 } 203 else { 204 fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 205 } 206 #else 207 fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 208 #endif 209 } 210 } 211 fprintf(fd,"\n"); 212 } 213 } 214 } 215 fflush(fd); 216 return 0; 217 } 218 219 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer) 220 { 221 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *) A->data; 222 int row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2; 223 double xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r; 224 Scalar *aa; 225 Draw draw; 226 DrawButton button; 227 PetscTruth isnull; 228 229 ViewerDrawGetDraw(viewer,&draw); 230 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 231 232 xr = a->n; yr = a->m; h = yr/10.0; w = xr/10.0; 233 xr += w; yr += h; xl = -w; yl = -h; 234 ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr); 235 /* loop over matrix elements drawing boxes */ 236 color = DRAW_BLUE; 237 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 238 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 239 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 240 x_l = a->j[j]*bs; x_r = x_l + 1.0; 241 aa = a->a + j*bs2; 242 for ( k=0; k<bs; k++ ) { 243 for ( l=0; l<bs; l++ ) { 244 if (PetscReal(*aa++) >= 0.) continue; 245 DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 246 } 247 } 248 } 249 } 250 color = DRAW_CYAN; 251 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 252 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 253 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 254 x_l = a->j[j]*bs; x_r = x_l + 1.0; 255 aa = a->a + j*bs2; 256 for ( k=0; k<bs; k++ ) { 257 for ( l=0; l<bs; l++ ) { 258 if (PetscReal(*aa++) != 0.) continue; 259 DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 260 } 261 } 262 } 263 } 264 265 color = DRAW_RED; 266 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 267 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 268 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 269 x_l = a->j[j]*bs; x_r = x_l + 1.0; 270 aa = a->a + j*bs2; 271 for ( k=0; k<bs; k++ ) { 272 for ( l=0; l<bs; l++ ) { 273 if (PetscReal(*aa++) <= 0.) continue; 274 DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 275 } 276 } 277 } 278 } 279 280 DrawFlush(draw); 281 DrawGetPause(draw,&pause); 282 if (pause >= 0) { PetscSleep(pause); return 0;} 283 284 /* allow the matrix to zoom or shrink */ 285 ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0); 286 while (button != BUTTON_RIGHT) { 287 DrawClear(draw); 288 if (button == BUTTON_LEFT) scale = .5; 289 else if (button == BUTTON_CENTER) scale = 2.; 290 xl = scale*(xl + w - xc) + xc - w*scale; 291 xr = scale*(xr - w - xc) + xc + w*scale; 292 yl = scale*(yl + h - yc) + yc - h*scale; 293 yr = scale*(yr - h - yc) + yc + h*scale; 294 w *= scale; h *= scale; 295 ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr); 296 color = DRAW_BLUE; 297 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 298 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 299 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 300 x_l = a->j[j]*bs; x_r = x_l + 1.0; 301 aa = a->a + j*bs2; 302 for ( k=0; k<bs; k++ ) { 303 for ( l=0; l<bs; l++ ) { 304 if (PetscReal(*aa++) >= 0.) continue; 305 DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 306 } 307 } 308 } 309 } 310 color = DRAW_CYAN; 311 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 312 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 313 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 314 x_l = a->j[j]*bs; x_r = x_l + 1.0; 315 aa = a->a + j*bs2; 316 for ( k=0; k<bs; k++ ) { 317 for ( l=0; l<bs; l++ ) { 318 if (PetscReal(*aa++) != 0.) continue; 319 DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 320 } 321 } 322 } 323 } 324 325 color = DRAW_RED; 326 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 327 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 328 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 329 x_l = a->j[j]*bs; x_r = x_l + 1.0; 330 aa = a->a + j*bs2; 331 for ( k=0; k<bs; k++ ) { 332 for ( l=0; l<bs; l++ ) { 333 if (PetscReal(*aa++) <= 0.) continue; 334 DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 335 } 336 } 337 } 338 } 339 ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0); 340 } 341 return 0; 342 } 343 344 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer) 345 { 346 Mat A = (Mat) obj; 347 ViewerType vtype; 348 int ierr; 349 350 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 351 if (vtype == MATLAB_VIEWER) { 352 SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported"); 353 } 354 else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){ 355 return MatView_SeqBAIJ_ASCII(A,viewer); 356 } 357 else if (vtype == BINARY_FILE_VIEWER) { 358 return MatView_SeqBAIJ_Binary(A,viewer); 359 } 360 else if (vtype == DRAW_VIEWER) { 361 return MatView_SeqBAIJ_Draw(A,viewer); 362 } 363 return 0; 364 } 365 366 #define CHUNKSIZE 10 367 368 /* This version has row oriented v */ 369 static int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 370 { 371 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 372 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 373 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 374 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 375 int ridx,cidx,bs2=a->bs2; 376 Scalar *ap,value,*aa=a->a,*bap; 377 378 for ( k=0; k<m; k++ ) { /* loop over added rows */ 379 row = im[k]; brow = row/bs; 380 if (row < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative row"); 381 if (row >= a->m) SETERRQ(1,"MatSetValues_SeqBAIJ:Row too large"); 382 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 383 rmax = imax[brow]; nrow = ailen[brow]; 384 low = 0; 385 for ( l=0; l<n; l++ ) { /* loop over added columns */ 386 if (in[l] < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative column"); 387 if (in[l] >= a->n) SETERRQ(1,"MatSetValues_SeqBAIJ:Column too large"); 388 col = in[l]; bcol = col/bs; 389 ridx = row % bs; cidx = col % bs; 390 if (roworiented) { 391 value = *v++; 392 } 393 else { 394 value = v[k + l*m]; 395 } 396 if (!sorted) low = 0; high = nrow; 397 while (high-low > 5) { 398 t = (low+high)/2; 399 if (rp[t] > bcol) high = t; 400 else low = t; 401 } 402 for ( i=low; i<high; i++ ) { 403 if (rp[i] > bcol) break; 404 if (rp[i] == bcol) { 405 bap = ap + bs2*i + bs*cidx + ridx; 406 if (is == ADD_VALUES) *bap += value; 407 else *bap = value; 408 goto noinsert; 409 } 410 } 411 if (nonew) goto noinsert; 412 if (nrow >= rmax) { 413 /* there is no extra room in row, therefore enlarge */ 414 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 415 Scalar *new_a; 416 417 /* malloc new storage space */ 418 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 419 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 420 new_j = (int *) (new_a + bs2*new_nz); 421 new_i = new_j + new_nz; 422 423 /* copy over old data into new slots */ 424 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} 425 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 426 PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); 427 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 428 PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, 429 len*sizeof(int)); 430 PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); 431 PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 432 PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), 433 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); 434 /* free up old matrix storage */ 435 PetscFree(a->a); 436 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 437 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 438 a->singlemalloc = 1; 439 440 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 441 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 442 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 443 a->maxnz += bs2*CHUNKSIZE; 444 a->reallocs++; 445 a->nz++; 446 } 447 N = nrow++ - 1; 448 /* shift up all the later entries in this row */ 449 for ( ii=N; ii>=i; ii-- ) { 450 rp[ii+1] = rp[ii]; 451 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 452 } 453 if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 454 rp[i] = bcol; 455 ap[bs2*i + bs*cidx + ridx] = value; 456 noinsert:; 457 low = i; 458 } 459 ailen[brow] = nrow; 460 } 461 return 0; 462 } 463 464 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 465 { 466 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 467 *m = a->m; *n = a->n; 468 return 0; 469 } 470 471 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 472 { 473 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 474 *m = 0; *n = a->m; 475 return 0; 476 } 477 478 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 479 { 480 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 481 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 482 Scalar *aa,*v_i,*aa_i; 483 484 bs = a->bs; 485 ai = a->i; 486 aj = a->j; 487 aa = a->a; 488 bs2 = a->bs2; 489 490 if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range"); 491 492 bn = row/bs; /* Block number */ 493 bp = row % bs; /* Block Position */ 494 M = ai[bn+1] - ai[bn]; 495 *nz = bs*M; 496 497 if (v) { 498 *v = 0; 499 if (*nz) { 500 *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v); 501 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 502 v_i = *v + i*bs; 503 aa_i = aa + bs2*(ai[bn] + i); 504 for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];} 505 } 506 } 507 } 508 509 if (idx) { 510 *idx = 0; 511 if (*nz) { 512 *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx); 513 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 514 idx_i = *idx + i*bs; 515 itmp = bs*aj[ai[bn] + i]; 516 for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;} 517 } 518 } 519 } 520 return 0; 521 } 522 523 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 524 { 525 if (idx) {if (*idx) PetscFree(*idx);} 526 if (v) {if (*v) PetscFree(*v);} 527 return 0; 528 } 529 530 static int MatTranspose_SeqBAIJ(Mat A,Mat *B) 531 { 532 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 533 Mat C; 534 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 535 int *rows,*cols,bs2=a->bs2; 536 Scalar *array=a->a; 537 538 if (B==PETSC_NULL && mbs!=nbs) 539 SETERRQ(1,"MatTranspose_SeqBAIJ:Square matrix only for in-place"); 540 col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col); 541 PetscMemzero(col,(1+nbs)*sizeof(int)); 542 543 for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1; 544 ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr); 545 PetscFree(col); 546 rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows); 547 cols = rows + bs; 548 for ( i=0; i<mbs; i++ ) { 549 cols[0] = i*bs; 550 for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1; 551 len = ai[i+1] - ai[i]; 552 for ( j=0; j<len; j++ ) { 553 rows[0] = (*aj++)*bs; 554 for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1; 555 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr); 556 array += bs2; 557 } 558 } 559 PetscFree(rows); 560 561 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 562 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 563 564 if (B != PETSC_NULL) { 565 *B = C; 566 } else { 567 /* This isn't really an in-place transpose */ 568 PetscFree(a->a); 569 if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);} 570 if (a->diag) PetscFree(a->diag); 571 if (a->ilen) PetscFree(a->ilen); 572 if (a->imax) PetscFree(a->imax); 573 if (a->solve_work) PetscFree(a->solve_work); 574 PetscFree(a); 575 PetscMemcpy(A,C,sizeof(struct _Mat)); 576 PetscHeaderDestroy(C); 577 } 578 return 0; 579 } 580 581 582 static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 583 { 584 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 585 int fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax; 586 int m = a->m,*ip, N, *ailen = a->ilen; 587 int mbs = a->mbs, bs2 = a->bs2; 588 Scalar *aa = a->a, *ap; 589 590 if (mode == MAT_FLUSH_ASSEMBLY) return 0; 591 592 for ( i=1; i<mbs; i++ ) { 593 /* move each row back by the amount of empty slots (fshift) before it*/ 594 fshift += imax[i-1] - ailen[i-1]; 595 if (fshift) { 596 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 597 N = ailen[i]; 598 for ( j=0; j<N; j++ ) { 599 ip[j-fshift] = ip[j]; 600 PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar)); 601 } 602 } 603 ai[i] = ai[i-1] + ailen[i-1]; 604 } 605 if (mbs) { 606 fshift += imax[mbs-1] - ailen[mbs-1]; 607 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 608 } 609 /* reset ilen and imax for each row */ 610 for ( i=0; i<mbs; i++ ) { 611 ailen[i] = imax[i] = ai[i+1] - ai[i]; 612 } 613 a->nz = ai[mbs]; 614 615 /* diagonals may have moved, so kill the diagonal pointers */ 616 if (fshift && a->diag) { 617 PetscFree(a->diag); 618 PLogObjectMemory(A,-(m+1)*sizeof(int)); 619 a->diag = 0; 620 } 621 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ: Unneed storage space (blocks) %d used %d, rows %d, block size %d\n", fshift*bs2,a->nz*bs2,m,a->bs); 622 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ: Number of mallocs during MatSetValues %d\n", 623 a->reallocs); 624 return 0; 625 } 626 627 static int MatZeroEntries_SeqBAIJ(Mat A) 628 { 629 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 630 PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar)); 631 return 0; 632 } 633 634 int MatDestroy_SeqBAIJ(PetscObject obj) 635 { 636 Mat A = (Mat) obj; 637 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 638 639 #if defined(PETSC_LOG) 640 PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 641 #endif 642 PetscFree(a->a); 643 if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);} 644 if (a->diag) PetscFree(a->diag); 645 if (a->ilen) PetscFree(a->ilen); 646 if (a->imax) PetscFree(a->imax); 647 if (a->solve_work) PetscFree(a->solve_work); 648 if (a->mult_work) PetscFree(a->mult_work); 649 PetscFree(a); 650 PLogObjectDestroy(A); 651 PetscHeaderDestroy(A); 652 return 0; 653 } 654 655 static int MatSetOption_SeqBAIJ(Mat A,MatOption op) 656 { 657 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 658 if (op == MAT_ROW_ORIENTED) a->roworiented = 1; 659 else if (op == MAT_COLUMN_ORIENTED) a->roworiented = 0; 660 else if (op == MAT_COLUMNS_SORTED) a->sorted = 1; 661 else if (op == MAT_NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 662 else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 663 else if (op == MAT_ROWS_SORTED || 664 op == MAT_SYMMETRIC || 665 op == MAT_STRUCTURALLY_SYMMETRIC || 666 op == MAT_YES_NEW_DIAGONALS) 667 PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n"); 668 else if (op == MAT_NO_NEW_DIAGONALS) 669 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:MAT_NO_NEW_DIAGONALS");} 670 else 671 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");} 672 return 0; 673 } 674 675 676 /* -------------------------------------------------------*/ 677 /* Should check that shapes of vectors and matrices match */ 678 /* -------------------------------------------------------*/ 679 #include "pinclude/plapack.h" 680 681 static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz) 682 { 683 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 684 Scalar *xg,*zg; 685 register Scalar *x,*z,*v,sum; 686 int mbs=a->mbs,i,*idx,*ii,n,ierr; 687 688 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 689 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 690 691 idx = a->j; 692 v = a->a; 693 ii = a->i; 694 695 for ( i=0; i<mbs; i++ ) { 696 n = ii[1] - ii[0]; ii++; 697 sum = 0.0; 698 while (n--) sum += *v++ * x[*idx++]; 699 z[i] = sum; 700 } 701 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 702 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 703 PLogFlops(2*a->nz - a->m); 704 return 0; 705 } 706 707 static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz) 708 { 709 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 710 Scalar *xg,*zg; 711 register Scalar *x,*z,*v,*xb,sum1,sum2; 712 register Scalar x1,x2; 713 int mbs=a->mbs,i,*idx,*ii; 714 int j,n,ierr; 715 716 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 717 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 718 719 idx = a->j; 720 v = a->a; 721 ii = a->i; 722 723 for ( i=0; i<mbs; i++ ) { 724 n = ii[1] - ii[0]; ii++; 725 sum1 = 0.0; sum2 = 0.0; 726 for ( j=0; j<n; j++ ) { 727 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 728 sum1 += v[0]*x1 + v[2]*x2; 729 sum2 += v[1]*x1 + v[3]*x2; 730 v += 4; 731 } 732 z[0] = sum1; z[1] = sum2; 733 z += 2; 734 } 735 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 736 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 737 PLogFlops(4*a->nz - a->m); 738 return 0; 739 } 740 741 static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz) 742 { 743 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 744 Scalar *xg,*zg; 745 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 746 int mbs=a->mbs,i,*idx,*ii,j,n,ierr; 747 748 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 749 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 750 751 idx = a->j; 752 v = a->a; 753 ii = a->i; 754 755 for ( i=0; i<mbs; i++ ) { 756 n = ii[1] - ii[0]; ii++; 757 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 758 for ( j=0; j<n; j++ ) { 759 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 760 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 761 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 762 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 763 v += 9; 764 } 765 z[0] = sum1; z[1] = sum2; z[2] = sum3; 766 z += 3; 767 } 768 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 769 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 770 PLogFlops(18*a->nz - a->m); 771 return 0; 772 } 773 774 static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz) 775 { 776 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 777 Scalar *xg,*zg; 778 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4; 779 register Scalar x1,x2,x3,x4; 780 int mbs=a->mbs,i,*idx,*ii; 781 int j,n,ierr; 782 783 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 784 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 785 786 idx = a->j; 787 v = a->a; 788 ii = a->i; 789 790 for ( i=0; i<mbs; i++ ) { 791 n = ii[1] - ii[0]; ii++; 792 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 793 for ( j=0; j<n; j++ ) { 794 xb = x + 4*(*idx++); 795 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 796 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 797 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 798 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 799 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 800 v += 16; 801 } 802 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 803 z += 4; 804 } 805 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 806 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 807 PLogFlops(32*a->nz - a->m); 808 return 0; 809 } 810 811 static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz) 812 { 813 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 814 Scalar *xg,*zg; 815 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5; 816 register Scalar x1,x2,x3,x4,x5; 817 int mbs=a->mbs,i,*idx,*ii,j,n,ierr; 818 819 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 820 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 821 822 idx = a->j; 823 v = a->a; 824 ii = a->i; 825 826 for ( i=0; i<mbs; i++ ) { 827 n = ii[1] - ii[0]; ii++; 828 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 829 for ( j=0; j<n; j++ ) { 830 xb = x + 5*(*idx++); 831 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 832 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 833 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 834 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 835 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 836 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 837 v += 25; 838 } 839 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 840 z += 5; 841 } 842 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 843 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 844 PLogFlops(50*a->nz - a->m); 845 return 0; 846 } 847 848 static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz) 849 { 850 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 851 Scalar *xg,*zg; 852 register Scalar *x,*z,*v,*xb; 853 int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr; 854 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt, _DZero = 0.0; 855 856 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 857 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 858 859 idx = a->j; 860 v = a->a; 861 ii = a->i; 862 863 864 if (!a->mult_work) { 865 k = PetscMax(a->m,a->n); 866 a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work); 867 } 868 work = a->mult_work; 869 for ( i=0; i<mbs; i++ ) { 870 n = ii[1] - ii[0]; ii++; 871 ncols = n*bs; 872 workt = work; 873 for ( j=0; j<n; j++ ) { 874 xb = x + bs*(*idx++); 875 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 876 workt += bs; 877 } 878 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One); 879 v += n*bs2; 880 z += bs; 881 } 882 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 883 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 884 PLogFlops(2*a->nz*bs2 - a->m); 885 return 0; 886 } 887 888 static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 889 { 890 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 891 Scalar *xg,*yg,*zg; 892 register Scalar *x,*y,*z,*v,sum; 893 int mbs=a->mbs,i,*idx,*ii,n,ierr; 894 895 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 896 ierr = VecGetArray(yy,&yg); CHKERRQ(ierr); y = yg; 897 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 898 899 idx = a->j; 900 v = a->a; 901 ii = a->i; 902 903 for ( i=0; i<mbs; i++ ) { 904 n = ii[1] - ii[0]; ii++; 905 sum = y[i]; 906 while (n--) sum += *v++ * x[*idx++]; 907 z[i] = sum; 908 } 909 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 910 ierr = VecRestoreArray(yy,&yg); CHKERRQ(ierr); 911 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 912 PLogFlops(2*a->nz); 913 return 0; 914 } 915 916 static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 917 { 918 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 919 Scalar *xg,*yg,*zg; 920 register Scalar *x,*y,*z,*v,*xb,sum1,sum2; 921 register Scalar x1,x2; 922 int mbs=a->mbs,i,*idx,*ii; 923 int j,n,ierr; 924 925 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 926 ierr = VecGetArray(yy,&yg); CHKERRQ(ierr); y = yg; 927 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 928 929 idx = a->j; 930 v = a->a; 931 ii = a->i; 932 933 for ( i=0; i<mbs; i++ ) { 934 n = ii[1] - ii[0]; ii++; 935 sum1 = y[0]; sum2 = y[1]; 936 for ( j=0; j<n; j++ ) { 937 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 938 sum1 += v[0]*x1 + v[2]*x2; 939 sum2 += v[1]*x1 + v[3]*x2; 940 v += 4; 941 } 942 z[0] = sum1; z[1] = sum2; 943 z += 2; y += 2; 944 } 945 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 946 ierr = VecRestoreArray(yy,&yg); CHKERRQ(ierr); 947 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 948 PLogFlops(4*a->nz); 949 return 0; 950 } 951 952 static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 953 { 954 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 955 Scalar *xg,*yg,*zg; 956 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 957 int mbs=a->mbs,i,*idx,*ii,j,n,ierr; 958 959 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 960 ierr = VecGetArray(yy,&yg); CHKERRQ(ierr); y = yg; 961 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 962 963 idx = a->j; 964 v = a->a; 965 ii = a->i; 966 967 for ( i=0; i<mbs; i++ ) { 968 n = ii[1] - ii[0]; ii++; 969 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; 970 for ( j=0; j<n; j++ ) { 971 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 972 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 973 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 974 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 975 v += 9; 976 } 977 z[0] = sum1; z[1] = sum2; z[2] = sum3; 978 z += 3; y += 3; 979 } 980 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 981 ierr = VecRestoreArray(yy,&yg); CHKERRQ(ierr); 982 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 983 PLogFlops(18*a->nz); 984 return 0; 985 } 986 987 static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 988 { 989 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 990 Scalar *xg,*yg,*zg; 991 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4; 992 register Scalar x1,x2,x3,x4; 993 int mbs=a->mbs,i,*idx,*ii; 994 int j,n,ierr; 995 996 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 997 ierr = VecGetArray(yy,&yg); CHKERRQ(ierr); y = yg; 998 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 999 1000 idx = a->j; 1001 v = a->a; 1002 ii = a->i; 1003 1004 for ( i=0; i<mbs; i++ ) { 1005 n = ii[1] - ii[0]; ii++; 1006 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; 1007 for ( j=0; j<n; j++ ) { 1008 xb = x + 4*(*idx++); 1009 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1010 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 1011 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 1012 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 1013 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 1014 v += 16; 1015 } 1016 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 1017 z += 4; y += 4; 1018 } 1019 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 1020 ierr = VecRestoreArray(yy,&yg); CHKERRQ(ierr); 1021 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1022 PLogFlops(32*a->nz); 1023 return 0; 1024 } 1025 1026 static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 1027 { 1028 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1029 Scalar *xg,*yg,*zg; 1030 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5; 1031 register Scalar x1,x2,x3,x4,x5; 1032 int mbs=a->mbs,i,*idx,*ii,j,n,ierr; 1033 1034 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 1035 ierr = VecGetArray(yy,&yg); CHKERRQ(ierr); y = yg; 1036 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 1037 1038 idx = a->j; 1039 v = a->a; 1040 ii = a->i; 1041 1042 for ( i=0; i<mbs; i++ ) { 1043 n = ii[1] - ii[0]; ii++; 1044 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; 1045 for ( j=0; j<n; j++ ) { 1046 xb = x + 5*(*idx++); 1047 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 1048 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 1049 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 1050 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 1051 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 1052 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 1053 v += 25; 1054 } 1055 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 1056 z += 5; y += 5; 1057 } 1058 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 1059 ierr = VecRestoreArray(yy,&yg); CHKERRQ(ierr); 1060 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1061 PLogFlops(50*a->nz); 1062 return 0; 1063 } 1064 1065 static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 1066 { 1067 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1068 Scalar *xg,*zg; 1069 register Scalar *x,*z,*v,*xb; 1070 int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr; 1071 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1072 1073 if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 1074 1075 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 1076 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 1077 1078 idx = a->j; 1079 v = a->a; 1080 ii = a->i; 1081 1082 1083 if (!a->mult_work) { 1084 k = PetscMax(a->m,a->n); 1085 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work); 1086 } 1087 work = a->mult_work; 1088 for ( i=0; i<mbs; i++ ) { 1089 n = ii[1] - ii[0]; ii++; 1090 ncols = n*bs; 1091 workt = work; 1092 for ( j=0; j<n; j++ ) { 1093 xb = x + bs*(*idx++); 1094 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 1095 workt += bs; 1096 } 1097 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); 1098 v += n*bs2; 1099 z += bs; 1100 } 1101 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 1102 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1103 PLogFlops(2*a->nz*bs2 ); 1104 return 0; 1105 } 1106 1107 static int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz) 1108 { 1109 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1110 Scalar *xg,*zg,*zb; 1111 register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 1112 int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 1113 int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1114 1115 1116 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 1117 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 1118 PetscMemzero(z,N*sizeof(Scalar)); 1119 1120 idx = a->j; 1121 v = a->a; 1122 ii = a->i; 1123 1124 switch (bs) { 1125 case 1: 1126 for ( i=0; i<mbs; i++ ) { 1127 n = ii[1] - ii[0]; ii++; 1128 xb = x + i; x1 = xb[0]; 1129 ib = idx + ai[i]; 1130 for ( j=0; j<n; j++ ) { 1131 rval = ib[j]; 1132 z[rval] += *v++ * x1; 1133 } 1134 } 1135 break; 1136 case 2: 1137 for ( i=0; i<mbs; i++ ) { 1138 n = ii[1] - ii[0]; ii++; 1139 xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1140 ib = idx + ai[i]; 1141 for ( j=0; j<n; j++ ) { 1142 rval = ib[j]*2; 1143 z[rval++] += v[0]*x1 + v[1]*x2; 1144 z[rval++] += v[2]*x1 + v[3]*x2; 1145 v += 4; 1146 } 1147 } 1148 break; 1149 case 3: 1150 for ( i=0; i<mbs; i++ ) { 1151 n = ii[1] - ii[0]; ii++; 1152 xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1153 ib = idx + ai[i]; 1154 for ( j=0; j<n; j++ ) { 1155 rval = ib[j]*3; 1156 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1157 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1158 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1159 v += 9; 1160 } 1161 } 1162 break; 1163 case 4: 1164 for ( i=0; i<mbs; i++ ) { 1165 n = ii[1] - ii[0]; ii++; 1166 xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1167 ib = idx + ai[i]; 1168 for ( j=0; j<n; j++ ) { 1169 rval = ib[j]*4; 1170 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1171 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1172 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1173 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1174 v += 16; 1175 } 1176 } 1177 break; 1178 case 5: 1179 for ( i=0; i<mbs; i++ ) { 1180 n = ii[1] - ii[0]; ii++; 1181 xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1182 x4 = xb[3]; x5 = xb[4]; 1183 ib = idx + ai[i]; 1184 for ( j=0; j<n; j++ ) { 1185 rval = ib[j]*5; 1186 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1187 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1188 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1189 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1190 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1191 v += 25; 1192 } 1193 } 1194 break; 1195 /* block sizes larger then 5 by 5 are handled by BLAS */ 1196 default: { 1197 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1198 if (!a->mult_work) { 1199 k = PetscMax(a->m,a->n); 1200 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1201 CHKPTRQ(a->mult_work); 1202 } 1203 work = a->mult_work; 1204 for ( i=0; i<mbs; i++ ) { 1205 n = ii[1] - ii[0]; ii++; 1206 ncols = n*bs; 1207 PetscMemzero(work,ncols*sizeof(Scalar)); 1208 LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1209 v += n*bs2; 1210 x += bs; 1211 workt = work; 1212 for ( j=0; j<n; j++ ) { 1213 zb = z + bs*(*idx++); 1214 for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1215 workt += bs; 1216 } 1217 } 1218 } 1219 } 1220 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 1221 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1222 PLogFlops(2*a->nz*a->bs2 - a->n); 1223 return 0; 1224 } 1225 static int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1226 { 1227 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1228 Scalar *xg,*zg,*zb; 1229 register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 1230 int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 1231 int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1232 1233 1234 1235 ierr = VecGetArray(xx,&xg); CHKERRQ(ierr); x = xg; 1236 ierr = VecGetArray(zz,&zg); CHKERRQ(ierr); z = zg; 1237 1238 if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 1239 else PetscMemzero(z,N*sizeof(Scalar)); 1240 1241 1242 idx = a->j; 1243 v = a->a; 1244 ii = a->i; 1245 1246 switch (bs) { 1247 case 1: 1248 for ( i=0; i<mbs; i++ ) { 1249 n = ii[1] - ii[0]; ii++; 1250 xb = x + i; x1 = xb[0]; 1251 ib = idx + ai[i]; 1252 for ( j=0; j<n; j++ ) { 1253 rval = ib[j]; 1254 z[rval] += *v++ * x1; 1255 } 1256 } 1257 break; 1258 case 2: 1259 for ( i=0; i<mbs; i++ ) { 1260 n = ii[1] - ii[0]; ii++; 1261 xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1262 ib = idx + ai[i]; 1263 for ( j=0; j<n; j++ ) { 1264 rval = ib[j]*2; 1265 z[rval++] += v[0]*x1 + v[1]*x2; 1266 z[rval++] += v[2]*x1 + v[3]*x2; 1267 v += 4; 1268 } 1269 } 1270 break; 1271 case 3: 1272 for ( i=0; i<mbs; i++ ) { 1273 n = ii[1] - ii[0]; ii++; 1274 xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1275 ib = idx + ai[i]; 1276 for ( j=0; j<n; j++ ) { 1277 rval = ib[j]*3; 1278 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1279 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1280 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1281 v += 9; 1282 } 1283 } 1284 break; 1285 case 4: 1286 for ( i=0; i<mbs; i++ ) { 1287 n = ii[1] - ii[0]; ii++; 1288 xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1289 ib = idx + ai[i]; 1290 for ( j=0; j<n; j++ ) { 1291 rval = ib[j]*4; 1292 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1293 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1294 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1295 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1296 v += 16; 1297 } 1298 } 1299 break; 1300 case 5: 1301 for ( i=0; i<mbs; i++ ) { 1302 n = ii[1] - ii[0]; ii++; 1303 xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1304 x4 = xb[3]; x5 = xb[4]; 1305 ib = idx + ai[i]; 1306 for ( j=0; j<n; j++ ) { 1307 rval = ib[j]*5; 1308 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1309 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1310 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1311 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1312 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1313 v += 25; 1314 } 1315 } 1316 break; 1317 /* block sizes larger then 5 by 5 are handled by BLAS */ 1318 default: { 1319 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1320 if (!a->mult_work) { 1321 k = PetscMax(a->m,a->n); 1322 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1323 CHKPTRQ(a->mult_work); 1324 } 1325 work = a->mult_work; 1326 for ( i=0; i<mbs; i++ ) { 1327 n = ii[1] - ii[0]; ii++; 1328 ncols = n*bs; 1329 PetscMemzero(work,ncols*sizeof(Scalar)); 1330 LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1331 v += n*bs2; 1332 x += bs; 1333 workt = work; 1334 for ( j=0; j<n; j++ ) { 1335 zb = z + bs*(*idx++); 1336 for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1337 workt += bs; 1338 } 1339 } 1340 } 1341 } 1342 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 1343 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1344 PLogFlops(2*a->nz*a->bs2); 1345 return 0; 1346 } 1347 1348 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,int *nz,int *nza,int *mem) 1349 { 1350 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1351 if (nz) *nz = a->bs2*a->nz; 1352 if (nza) *nza = a->maxnz; 1353 if (mem) *mem = (int)A->mem; 1354 return 0; 1355 } 1356 1357 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg) 1358 { 1359 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 1360 1361 if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type"); 1362 1363 /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 1364 if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)|| 1365 (a->nz != b->nz)) { 1366 *flg = PETSC_FALSE; return 0; 1367 } 1368 1369 /* if the a->i are the same */ 1370 if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) { 1371 *flg = PETSC_FALSE; return 0; 1372 } 1373 1374 /* if a->j are the same */ 1375 if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) { 1376 *flg = PETSC_FALSE; return 0; 1377 } 1378 1379 /* if a->a are the same */ 1380 if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) { 1381 *flg = PETSC_FALSE; return 0; 1382 } 1383 *flg = PETSC_TRUE; 1384 return 0; 1385 1386 } 1387 1388 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v) 1389 { 1390 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1391 int i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 1392 Scalar *x, zero = 0.0,*aa,*aa_j; 1393 1394 bs = a->bs; 1395 aa = a->a; 1396 ai = a->i; 1397 aj = a->j; 1398 ambs = a->mbs; 1399 bs2 = a->bs2; 1400 1401 VecSet(&zero,v); 1402 VecGetArray(v,&x); VecGetLocalSize(v,&n); 1403 if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector"); 1404 for ( i=0; i<ambs; i++ ) { 1405 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1406 if (aj[j] == i) { 1407 row = i*bs; 1408 aa_j = aa+j*bs2; 1409 for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 1410 break; 1411 } 1412 } 1413 } 1414 return 0; 1415 } 1416 1417 static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr) 1418 { 1419 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1420 Scalar *l,*r,x,*v,*aa,*li,*ri; 1421 int i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2; 1422 1423 ai = a->i; 1424 aj = a->j; 1425 aa = a->a; 1426 m = a->m; 1427 n = a->n; 1428 bs = a->bs; 1429 mbs = a->mbs; 1430 bs2 = a->bs2; 1431 if (ll) { 1432 VecGetArray(ll,&l); VecGetSize(ll,&lm); 1433 if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length"); 1434 for ( i=0; i<mbs; i++ ) { /* for each block row */ 1435 M = ai[i+1] - ai[i]; 1436 li = l + i*bs; 1437 v = aa + bs2*ai[i]; 1438 for ( j=0; j<M; j++ ) { /* for each block */ 1439 for ( k=0; k<bs2; k++ ) { 1440 (*v++) *= li[k%bs]; 1441 } 1442 } 1443 } 1444 } 1445 1446 if (rr) { 1447 VecGetArray(rr,&r); VecGetSize(rr,&rn); 1448 if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length"); 1449 for ( i=0; i<mbs; i++ ) { /* for each block row */ 1450 M = ai[i+1] - ai[i]; 1451 v = aa + bs2*ai[i]; 1452 for ( j=0; j<M; j++ ) { /* for each block */ 1453 ri = r + bs*aj[ai[i]+j]; 1454 for ( k=0; k<bs; k++ ) { 1455 x = ri[k]; 1456 for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x; 1457 } 1458 } 1459 } 1460 } 1461 return 0; 1462 } 1463 1464 1465 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 1466 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 1467 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int); 1468 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*); 1469 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**); 1470 1471 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec); 1472 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec); 1473 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec); 1474 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec); 1475 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec); 1476 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec); 1477 1478 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 1479 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 1480 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 1481 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 1482 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 1483 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 1484 1485 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm) 1486 { 1487 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1488 Scalar *v = a->a; 1489 double sum = 0.0; 1490 int i,nz=a->nz,bs2=a->bs2; 1491 1492 if (type == NORM_FROBENIUS) { 1493 for (i=0; i< bs2*nz; i++ ) { 1494 #if defined(PETSC_COMPLEX) 1495 sum += real(conj(*v)*(*v)); v++; 1496 #else 1497 sum += (*v)*(*v); v++; 1498 #endif 1499 } 1500 *norm = sqrt(sum); 1501 } 1502 else { 1503 SETERRQ(1,"MatNorm_SeqBAIJ:No support for this norm yet"); 1504 } 1505 return 0; 1506 } 1507 1508 /* 1509 note: This can only work for identity for row and col. It would 1510 be good to check this and otherwise generate an error. 1511 */ 1512 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill) 1513 { 1514 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1515 Mat outA; 1516 int ierr; 1517 1518 if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported"); 1519 1520 outA = inA; 1521 inA->factor = FACTOR_LU; 1522 a->row = row; 1523 a->col = col; 1524 1525 a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 1526 1527 if (!a->diag) { 1528 ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr); 1529 } 1530 ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr); 1531 return 0; 1532 } 1533 1534 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA) 1535 { 1536 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1537 int one = 1, totalnz = a->bs2*a->nz; 1538 BLscal_( &totalnz, alpha, a->a, &one ); 1539 PLogFlops(totalnz); 1540 return 0; 1541 } 1542 1543 static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 1544 { 1545 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1546 int *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 1547 int *ai = a->i, *ailen = a->ilen; 1548 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 1549 Scalar *ap, *aa = a->a, zero = 0.0; 1550 1551 for ( k=0; k<m; k++ ) { /* loop over rows */ 1552 row = im[k]; brow = row/bs; 1553 if (row < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative row"); 1554 if (row >= a->m) SETERRQ(1,"MatGetValues_SeqBAIJ:Row too large"); 1555 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1556 nrow = ailen[brow]; 1557 for ( l=0; l<n; l++ ) { /* loop over columns */ 1558 if (in[l] < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative column"); 1559 if (in[l] >= a->n) SETERRQ(1,"MatGetValues_SeqBAIJ:Column too large"); 1560 col = in[l] ; 1561 bcol = col/bs; 1562 cidx = col%bs; 1563 ridx = row%bs; 1564 high = nrow; 1565 low = 0; /* assume unsorted */ 1566 while (high-low > 5) { 1567 t = (low+high)/2; 1568 if (rp[t] > bcol) high = t; 1569 else low = t; 1570 } 1571 for ( i=low; i<high; i++ ) { 1572 if (rp[i] > bcol) break; 1573 if (rp[i] == bcol) { 1574 *v++ = ap[bs2*i+bs*cidx+ridx]; 1575 goto finished; 1576 } 1577 } 1578 *v++ = zero; 1579 finished:; 1580 } 1581 } 1582 return 0; 1583 } 1584 1585 static int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs) 1586 { 1587 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data; 1588 *bs = baij->bs; 1589 return 0; 1590 } 1591 1592 /* idx should be of length atlease bs */ 1593 static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg) 1594 { 1595 int i,row; 1596 row = idx[0]; 1597 if (row%bs!=0) { *flg = PETSC_FALSE; return 0; } 1598 1599 for ( i=1; i<bs; i++ ) { 1600 if (row+i != idx[i]) { *flg = PETSC_FALSE; return 0; } 1601 } 1602 *flg = PETSC_TRUE; 1603 return 0; 1604 } 1605 1606 static int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag) 1607 { 1608 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1609 IS is_local; 1610 int ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2; 1611 PetscTruth flg; 1612 Scalar zero = 0.0,*aa; 1613 1614 /* Make a copy of the IS and sort it */ 1615 ierr = ISGetSize(is,&is_n);CHKERRQ(ierr); 1616 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1617 ierr = ISCreateSeq(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr); 1618 ierr = ISSort(is_local); CHKERRQ(ierr); 1619 ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr); 1620 1621 i = 0; 1622 while (i < is_n) { 1623 if (rows[i]<0 || rows[i]>m) SETERRQ(1,"MatZeroRows_SeqBAIJ:row out of range"); 1624 flg = PETSC_FALSE; 1625 if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); } 1626 if (flg) { /* There exists a block of rows to be Zerowed */ 1627 baij->ilen[rows[i]/bs] = 0; 1628 i += bs; 1629 } else { /* Zero out only the requested row */ 1630 count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs; 1631 aa = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs); 1632 for ( j=0; j<count; j++ ) { 1633 aa[0] = zero; 1634 aa+=bs; 1635 } 1636 i++; 1637 } 1638 } 1639 if (diag) { 1640 for ( j=0; j<is_n; j++ ) { 1641 ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1642 } 1643 } 1644 ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr); 1645 ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr); 1646 ierr = ISDestroy(is_local); CHKERRQ(ierr); 1647 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1648 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1649 1650 return 0; 1651 } 1652 int MatPrintHelp_SeqBAIJ(Mat A) 1653 { 1654 static int called = 0; 1655 MPI_Comm comm = A->comm; 1656 1657 if (called) return 0; else called = 1; 1658 PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n"); 1659 PetscPrintf(comm," -mat_block_size <block_size>\n"); 1660 return 0; 1661 } 1662 1663 /* -------------------------------------------------------------------*/ 1664 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ, 1665 MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ, 1666 MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N, 1667 MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ, 1668 MatSolve_SeqBAIJ_N,0, 1669 0,0, 1670 MatLUFactor_SeqBAIJ,0, 1671 0, 1672 MatTranspose_SeqBAIJ, 1673 MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ, 1674 MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ, 1675 0,MatAssemblyEnd_SeqBAIJ, 1676 0, 1677 MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ, 1678 MatGetReordering_SeqBAIJ, 1679 MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0, 1680 MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ, 1681 MatILUFactorSymbolic_SeqBAIJ,0, 1682 0,0,/* MatConvert_SeqBAIJ */ 0, 1683 MatGetSubMatrix_SeqBAIJ,0, 1684 MatConvertSameType_SeqBAIJ,0,0, 1685 MatILUFactor_SeqBAIJ,0,0, 1686 MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ, 1687 MatGetValues_SeqBAIJ,0, 1688 MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ, 1689 0,0,0,MatGetBlockSize_SeqBAIJ}; 1690 1691 /*@C 1692 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1693 compressed row) format. For good matrix assembly performance the 1694 user should preallocate the matrix storage by setting the parameter nz 1695 (or the array nzz). By setting these parameters accurately, performance 1696 during matrix assembly can be increased by more than a factor of 50. 1697 1698 Input Parameters: 1699 . comm - MPI communicator, set to MPI_COMM_SELF 1700 . bs - size of block 1701 . m - number of rows 1702 . n - number of columns 1703 . nz - number of block nonzeros per block row (same for all rows) 1704 . nzz - array containing the number of block nonzeros in the various block rows 1705 (possibly different for each block row) or PETSC_NULL 1706 1707 Output Parameter: 1708 . A - the matrix 1709 1710 Notes: 1711 The block AIJ format is fully compatible with standard Fortran 77 1712 storage. That is, the stored row and column indices can begin at 1713 either one (as in Fortran) or zero. See the users' manual for details. 1714 1715 Specify the preallocated storage with either nz or nnz (not both). 1716 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1717 allocation. For additional details, see the users manual chapter on 1718 matrices and the file $(PETSC_DIR)/Performance. 1719 1720 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1721 @*/ 1722 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 1723 { 1724 Mat B; 1725 Mat_SeqBAIJ *b; 1726 int i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs; 1727 1728 if (mbs*bs!=m || nbs*bs!=n) 1729 SETERRQ(1,"MatCreateSeqBAIJ:Number rows, cols must be divisible by blocksize"); 1730 1731 *A = 0; 1732 PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm); 1733 PLogObjectCreate(B); 1734 B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b); 1735 PetscMemzero(b,sizeof(Mat_SeqBAIJ)); 1736 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 1737 ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr); 1738 if (!flg) { 1739 switch (bs) { 1740 case 1: 1741 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1742 B->ops.solve = MatSolve_SeqBAIJ_1; 1743 B->ops.mult = MatMult_SeqBAIJ_1; 1744 B->ops.multadd = MatMultAdd_SeqBAIJ_1; 1745 break; 1746 case 2: 1747 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 1748 B->ops.solve = MatSolve_SeqBAIJ_2; 1749 B->ops.mult = MatMult_SeqBAIJ_2; 1750 B->ops.multadd = MatMultAdd_SeqBAIJ_2; 1751 break; 1752 case 3: 1753 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 1754 B->ops.solve = MatSolve_SeqBAIJ_3; 1755 B->ops.mult = MatMult_SeqBAIJ_3; 1756 B->ops.multadd = MatMultAdd_SeqBAIJ_3; 1757 break; 1758 case 4: 1759 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 1760 B->ops.solve = MatSolve_SeqBAIJ_4; 1761 B->ops.mult = MatMult_SeqBAIJ_4; 1762 B->ops.multadd = MatMultAdd_SeqBAIJ_4; 1763 break; 1764 case 5: 1765 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 1766 B->ops.solve = MatSolve_SeqBAIJ_5; 1767 B->ops.mult = MatMult_SeqBAIJ_5; 1768 B->ops.multadd = MatMultAdd_SeqBAIJ_5; 1769 break; 1770 } 1771 } 1772 B->destroy = MatDestroy_SeqBAIJ; 1773 B->view = MatView_SeqBAIJ; 1774 B->factor = 0; 1775 B->lupivotthreshold = 1.0; 1776 b->row = 0; 1777 b->col = 0; 1778 b->reallocs = 0; 1779 1780 b->m = m; B->m = m; B->M = m; 1781 b->n = n; B->n = n; B->N = n; 1782 b->mbs = mbs; 1783 b->nbs = nbs; 1784 b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax); 1785 if (nnz == PETSC_NULL) { 1786 if (nz == PETSC_DEFAULT) nz = 5; 1787 else if (nz <= 0) nz = 1; 1788 for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 1789 nz = nz*mbs; 1790 } 1791 else { 1792 nz = 0; 1793 for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 1794 } 1795 1796 /* allocate the matrix space */ 1797 len = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int); 1798 b->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a); 1799 PetscMemzero(b->a,nz*bs2*sizeof(Scalar)); 1800 b->j = (int *) (b->a + nz*bs2); 1801 PetscMemzero(b->j,nz*sizeof(int)); 1802 b->i = b->j + nz; 1803 b->singlemalloc = 1; 1804 1805 b->i[0] = 0; 1806 for (i=1; i<mbs+1; i++) { 1807 b->i[i] = b->i[i-1] + b->imax[i-1]; 1808 } 1809 1810 /* b->ilen will count nonzeros in each block row so far. */ 1811 b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 1812 PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 1813 for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 1814 1815 b->bs = bs; 1816 b->bs2 = bs2; 1817 b->mbs = mbs; 1818 b->nz = 0; 1819 b->maxnz = nz*bs2; 1820 b->sorted = 0; 1821 b->roworiented = 1; 1822 b->nonew = 0; 1823 b->diag = 0; 1824 b->solve_work = 0; 1825 b->mult_work = 0; 1826 b->spptr = 0; 1827 *A = B; 1828 ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr); 1829 if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); } 1830 return 0; 1831 } 1832 1833 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues) 1834 { 1835 Mat C; 1836 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 1837 int i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 1838 1839 if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix"); 1840 1841 *B = 0; 1842 PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm); 1843 PLogObjectCreate(C); 1844 C->data = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c); 1845 PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps)); 1846 C->destroy = MatDestroy_SeqBAIJ; 1847 C->view = MatView_SeqBAIJ; 1848 C->factor = A->factor; 1849 c->row = 0; 1850 c->col = 0; 1851 C->assembled = PETSC_TRUE; 1852 1853 c->m = C->m = a->m; 1854 c->n = C->n = a->n; 1855 C->M = a->m; 1856 C->N = a->n; 1857 1858 c->bs = a->bs; 1859 c->bs2 = a->bs2; 1860 c->mbs = a->mbs; 1861 c->nbs = a->nbs; 1862 1863 c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax); 1864 c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen); 1865 for ( i=0; i<mbs; i++ ) { 1866 c->imax[i] = a->imax[i]; 1867 c->ilen[i] = a->ilen[i]; 1868 } 1869 1870 /* allocate the matrix space */ 1871 c->singlemalloc = 1; 1872 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int)); 1873 c->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a); 1874 c->j = (int *) (c->a + nz*bs2); 1875 c->i = c->j + nz; 1876 PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int)); 1877 if (mbs > 0) { 1878 PetscMemcpy(c->j,a->j,nz*sizeof(int)); 1879 if (cpvalues == COPY_VALUES) { 1880 PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar)); 1881 } 1882 } 1883 1884 PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 1885 c->sorted = a->sorted; 1886 c->roworiented = a->roworiented; 1887 c->nonew = a->nonew; 1888 1889 if (a->diag) { 1890 c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag); 1891 PLogObjectMemory(C,(mbs+1)*sizeof(int)); 1892 for ( i=0; i<mbs; i++ ) { 1893 c->diag[i] = a->diag[i]; 1894 } 1895 } 1896 else c->diag = 0; 1897 c->nz = a->nz; 1898 c->maxnz = a->maxnz; 1899 c->solve_work = 0; 1900 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1901 c->mult_work = 0; 1902 *B = C; 1903 return 0; 1904 } 1905 1906 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 1907 { 1908 Mat_SeqBAIJ *a; 1909 Mat B; 1910 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg; 1911 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1912 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1913 int *masked, nmask,tmp,bs2,ishift; 1914 Scalar *aa; 1915 MPI_Comm comm = ((PetscObject) viewer)->comm; 1916 1917 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 1918 bs2 = bs*bs; 1919 1920 MPI_Comm_size(comm,&size); 1921 if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor"); 1922 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 1923 ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr); 1924 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object"); 1925 M = header[1]; N = header[2]; nz = header[3]; 1926 1927 if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices"); 1928 1929 /* 1930 This code adds extra rows to make sure the number of rows is 1931 divisible by the blocksize 1932 */ 1933 mbs = M/bs; 1934 extra_rows = bs - M + bs*(mbs); 1935 if (extra_rows == bs) extra_rows = 0; 1936 else mbs++; 1937 if (extra_rows) { 1938 PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize"); 1939 } 1940 1941 /* read in row lengths */ 1942 rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 1943 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 1944 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 1945 1946 /* read in column indices */ 1947 jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj); 1948 ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr); 1949 for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 1950 1951 /* loop over row lengths determining block row lengths */ 1952 browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 1953 PetscMemzero(browlengths,mbs*sizeof(int)); 1954 mask = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask); 1955 PetscMemzero(mask,mbs*sizeof(int)); 1956 masked = mask + mbs; 1957 rowcount = 0; nzcount = 0; 1958 for ( i=0; i<mbs; i++ ) { 1959 nmask = 0; 1960 for ( j=0; j<bs; j++ ) { 1961 kmax = rowlengths[rowcount]; 1962 for ( k=0; k<kmax; k++ ) { 1963 tmp = jj[nzcount++]/bs; 1964 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1965 } 1966 rowcount++; 1967 } 1968 browlengths[i] += nmask; 1969 /* zero out the mask elements we set */ 1970 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 1971 } 1972 1973 /* create our matrix */ 1974 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A); 1975 CHKERRQ(ierr); 1976 B = *A; 1977 a = (Mat_SeqBAIJ *) B->data; 1978 1979 /* set matrix "i" values */ 1980 a->i[0] = 0; 1981 for ( i=1; i<= mbs; i++ ) { 1982 a->i[i] = a->i[i-1] + browlengths[i-1]; 1983 a->ilen[i-1] = browlengths[i-1]; 1984 } 1985 a->nz = 0; 1986 for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 1987 1988 /* read in nonzero values */ 1989 aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 1990 ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr); 1991 for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 1992 1993 /* set "a" and "j" values into matrix */ 1994 nzcount = 0; jcount = 0; 1995 for ( i=0; i<mbs; i++ ) { 1996 nzcountb = nzcount; 1997 nmask = 0; 1998 for ( j=0; j<bs; j++ ) { 1999 kmax = rowlengths[i*bs+j]; 2000 for ( k=0; k<kmax; k++ ) { 2001 tmp = jj[nzcount++]/bs; 2002 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2003 } 2004 rowcount++; 2005 } 2006 /* sort the masked values */ 2007 PetscSortInt(nmask,masked); 2008 2009 /* set "j" values into matrix */ 2010 maskcount = 1; 2011 for ( j=0; j<nmask; j++ ) { 2012 a->j[jcount++] = masked[j]; 2013 mask[masked[j]] = maskcount++; 2014 } 2015 /* set "a" values into matrix */ 2016 ishift = bs2*a->i[i]; 2017 for ( j=0; j<bs; j++ ) { 2018 kmax = rowlengths[i*bs+j]; 2019 for ( k=0; k<kmax; k++ ) { 2020 tmp = jj[nzcountb]/bs ; 2021 block = mask[tmp] - 1; 2022 point = jj[nzcountb] - bs*tmp; 2023 idx = ishift + bs2*block + j + bs*point; 2024 a->a[idx] = aa[nzcountb++]; 2025 } 2026 } 2027 /* zero out the mask elements we set */ 2028 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 2029 } 2030 if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix"); 2031 2032 PetscFree(rowlengths); 2033 PetscFree(browlengths); 2034 PetscFree(aa); 2035 PetscFree(jj); 2036 PetscFree(mask); 2037 2038 B->assembled = PETSC_TRUE; 2039 2040 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr); 2041 if (flg) { 2042 Viewer tviewer; 2043 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2044 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO,0);CHKERRQ(ierr); 2045 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2046 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2047 } 2048 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr); 2049 if (flg) { 2050 Viewer tviewer; 2051 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2052 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_INFO_DETAILED,0);CHKERRQ(ierr); 2053 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2054 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2055 } 2056 ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr); 2057 if (flg) { 2058 Viewer tviewer; 2059 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2060 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2061 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2062 } 2063 ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr); 2064 if (flg) { 2065 Viewer tviewer; 2066 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2067 ierr = ViewerSetFormat(tviewer,ASCII_FORMAT_MATLAB,"M");CHKERRQ(ierr); 2068 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2069 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2070 } 2071 ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr); 2072 if (flg) { 2073 Viewer tviewer; 2074 ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr); 2075 ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr); 2076 ierr = ViewerFlush(tviewer); CHKERRQ(ierr); 2077 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2078 } 2079 return 0; 2080 } 2081 2082 2083 2084