1 #ifndef lint 2 static char vcid[] = "$Id: baij.c,v 1.83 1997/01/22 18:43:37 bsmith Exp bsmith $"; 3 #endif 4 5 /* 6 Defines the basic matrix operations for the BAIJ (compressed row) 7 matrix storage format. 8 */ 9 #include "src/mat/impls/baij/seq/baij.h" 10 #include "src/vec/vecimpl.h" 11 #include "src/inline/spops.h" 12 #include "petsc.h" 13 14 15 /* 16 Adds diagonal pointers to sparse matrix structure. 17 */ 18 19 #undef __FUNC__ 20 #define __FUNC__ "MatMarkDiag_SeqBAIJ" 21 int MatMarkDiag_SeqBAIJ(Mat A) 22 { 23 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 24 int i,j, *diag, m = a->mbs; 25 26 diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag); 27 PLogObjectMemory(A,(m+1)*sizeof(int)); 28 for ( i=0; i<m; i++ ) { 29 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 30 if (a->j[j] == i) { 31 diag[i] = j; 32 break; 33 } 34 } 35 } 36 a->diag = diag; 37 return 0; 38 } 39 40 #include "draw.h" 41 #include "pinclude/pviewer.h" 42 #include "sys.h" 43 44 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 45 46 #undef __FUNC__ 47 #define __FUNC__ "MatGetRowIJ_SeqBAIJ" 48 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 49 PetscTruth *done) 50 { 51 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 52 int ierr, n = a->mbs,i; 53 54 *nn = n; 55 if (!ia) return 0; 56 if (symmetric) { 57 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 58 } else if (oshift == 1) { 59 /* temporarily add 1 to i and j indices */ 60 int nz = a->i[n] + 1; 61 for ( i=0; i<nz; i++ ) a->j[i]++; 62 for ( i=0; i<n+1; i++ ) a->i[i]++; 63 *ia = a->i; *ja = a->j; 64 } else { 65 *ia = a->i; *ja = a->j; 66 } 67 68 return 0; 69 } 70 71 #undef __FUNC__ 72 #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ" 73 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 74 PetscTruth *done) 75 { 76 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 77 int i,n = a->mbs; 78 79 if (!ia) return 0; 80 if (symmetric) { 81 PetscFree(*ia); 82 PetscFree(*ja); 83 } else if (oshift == 1) { 84 int nz = a->i[n]; 85 for ( i=0; i<nz; i++ ) a->j[i]--; 86 for ( i=0; i<n+1; i++ ) a->i[i]--; 87 } 88 return 0; 89 } 90 91 92 #undef __FUNC__ 93 #define __FUNC__ "MatView_SeqBAIJ_Binary" 94 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 95 { 96 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 97 int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 98 Scalar *aa; 99 100 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 101 col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 102 col_lens[0] = MAT_COOKIE; 103 104 col_lens[1] = a->m; 105 col_lens[2] = a->n; 106 col_lens[3] = a->nz*bs2; 107 108 /* store lengths of each row and write (including header) to file */ 109 count = 0; 110 for ( i=0; i<a->mbs; i++ ) { 111 for ( j=0; j<bs; j++ ) { 112 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 113 } 114 } 115 ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr); 116 PetscFree(col_lens); 117 118 /* store column indices (zero start index) */ 119 jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj); 120 count = 0; 121 for ( i=0; i<a->mbs; i++ ) { 122 for ( j=0; j<bs; j++ ) { 123 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 124 for ( l=0; l<bs; l++ ) { 125 jj[count++] = bs*a->j[k] + l; 126 } 127 } 128 } 129 } 130 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr); 131 PetscFree(jj); 132 133 /* store nonzero values */ 134 aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa); 135 count = 0; 136 for ( i=0; i<a->mbs; i++ ) { 137 for ( j=0; j<bs; j++ ) { 138 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 139 for ( l=0; l<bs; l++ ) { 140 aa[count++] = a->a[bs2*k + l*bs + j]; 141 } 142 } 143 } 144 } 145 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr); 146 PetscFree(aa); 147 return 0; 148 } 149 150 #undef __FUNC__ 151 #define __FUNC__ "MatView_SeqBAIJ_ASCII" 152 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 153 { 154 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 155 int ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2; 156 FILE *fd; 157 char *outputname; 158 159 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 160 ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr); 161 ierr = ViewerGetFormat(viewer,&format); 162 if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) { 163 fprintf(fd," block size is %d\n",bs); 164 } 165 else if (format == VIEWER_FORMAT_ASCII_MATLAB) { 166 SETERRQ(1,0,"Matlab format not supported"); 167 } 168 else if (format == VIEWER_FORMAT_ASCII_COMMON) { 169 for ( i=0; i<a->mbs; i++ ) { 170 for ( j=0; j<bs; j++ ) { 171 fprintf(fd,"row %d:",i*bs+j); 172 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 173 for ( l=0; l<bs; l++ ) { 174 #if defined(PETSC_COMPLEX) 175 if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0) 176 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 177 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 178 else if (real(a->a[bs2*k + l*bs + j]) != 0.0) 179 fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 180 #else 181 if (a->a[bs2*k + l*bs + j] != 0.0) 182 fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 183 #endif 184 } 185 } 186 fprintf(fd,"\n"); 187 } 188 } 189 } 190 else { 191 for ( i=0; i<a->mbs; i++ ) { 192 for ( j=0; j<bs; j++ ) { 193 fprintf(fd,"row %d:",i*bs+j); 194 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 195 for ( l=0; l<bs; l++ ) { 196 #if defined(PETSC_COMPLEX) 197 if (imag(a->a[bs2*k + l*bs + j]) != 0.0) { 198 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 199 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 200 } 201 else { 202 fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 203 } 204 #else 205 fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 206 #endif 207 } 208 } 209 fprintf(fd,"\n"); 210 } 211 } 212 } 213 fflush(fd); 214 return 0; 215 } 216 217 #undef __FUNC__ 218 #define __FUNC__ "MatView_SeqBAIJ_Draw" 219 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer) 220 { 221 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *) A->data; 222 int row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2; 223 double xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r; 224 Scalar *aa; 225 Draw draw; 226 DrawButton button; 227 PetscTruth isnull; 228 229 ViewerDrawGetDraw(viewer,&draw); 230 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 231 232 xr = a->n; yr = a->m; h = yr/10.0; w = xr/10.0; 233 xr += w; yr += h; xl = -w; yl = -h; 234 ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr); 235 /* loop over matrix elements drawing boxes */ 236 color = DRAW_BLUE; 237 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 238 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 239 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 240 x_l = a->j[j]*bs; x_r = x_l + 1.0; 241 aa = a->a + j*bs2; 242 for ( k=0; k<bs; k++ ) { 243 for ( l=0; l<bs; l++ ) { 244 if (PetscReal(*aa++) >= 0.) continue; 245 DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 246 } 247 } 248 } 249 } 250 color = DRAW_CYAN; 251 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 252 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 253 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 254 x_l = a->j[j]*bs; x_r = x_l + 1.0; 255 aa = a->a + j*bs2; 256 for ( k=0; k<bs; k++ ) { 257 for ( l=0; l<bs; l++ ) { 258 if (PetscReal(*aa++) != 0.) continue; 259 DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 260 } 261 } 262 } 263 } 264 265 color = DRAW_RED; 266 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 267 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 268 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 269 x_l = a->j[j]*bs; x_r = x_l + 1.0; 270 aa = a->a + j*bs2; 271 for ( k=0; k<bs; k++ ) { 272 for ( l=0; l<bs; l++ ) { 273 if (PetscReal(*aa++) <= 0.) continue; 274 DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 275 } 276 } 277 } 278 } 279 280 DrawFlush(draw); 281 DrawGetPause(draw,&pause); 282 if (pause >= 0) { PetscSleep(pause); return 0;} 283 284 /* allow the matrix to zoom or shrink */ 285 ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0); 286 while (button != BUTTON_RIGHT) { 287 DrawClear(draw); 288 if (button == BUTTON_LEFT) scale = .5; 289 else if (button == BUTTON_CENTER) scale = 2.; 290 xl = scale*(xl + w - xc) + xc - w*scale; 291 xr = scale*(xr - w - xc) + xc + w*scale; 292 yl = scale*(yl + h - yc) + yc - h*scale; 293 yr = scale*(yr - h - yc) + yc + h*scale; 294 w *= scale; h *= scale; 295 ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr); 296 color = DRAW_BLUE; 297 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 298 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 299 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 300 x_l = a->j[j]*bs; x_r = x_l + 1.0; 301 aa = a->a + j*bs2; 302 for ( k=0; k<bs; k++ ) { 303 for ( l=0; l<bs; l++ ) { 304 if (PetscReal(*aa++) >= 0.) continue; 305 DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 306 } 307 } 308 } 309 } 310 color = DRAW_CYAN; 311 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 312 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 313 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 314 x_l = a->j[j]*bs; x_r = x_l + 1.0; 315 aa = a->a + j*bs2; 316 for ( k=0; k<bs; k++ ) { 317 for ( l=0; l<bs; l++ ) { 318 if (PetscReal(*aa++) != 0.) continue; 319 DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 320 } 321 } 322 } 323 } 324 325 color = DRAW_RED; 326 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 327 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 328 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 329 x_l = a->j[j]*bs; x_r = x_l + 1.0; 330 aa = a->a + j*bs2; 331 for ( k=0; k<bs; k++ ) { 332 for ( l=0; l<bs; l++ ) { 333 if (PetscReal(*aa++) <= 0.) continue; 334 DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 335 } 336 } 337 } 338 } 339 ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0); 340 } 341 return 0; 342 } 343 344 #undef __FUNC__ 345 #define __FUNC__ "MatView_SeqBAIJ" 346 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer) 347 { 348 Mat A = (Mat) obj; 349 ViewerType vtype; 350 int ierr; 351 352 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 353 if (vtype == MATLAB_VIEWER) { 354 SETERRQ(1,0,"Matlab viewer not supported"); 355 } 356 else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){ 357 return MatView_SeqBAIJ_ASCII(A,viewer); 358 } 359 else if (vtype == BINARY_FILE_VIEWER) { 360 return MatView_SeqBAIJ_Binary(A,viewer); 361 } 362 else if (vtype == DRAW_VIEWER) { 363 return MatView_SeqBAIJ_Draw(A,viewer); 364 } 365 return 0; 366 } 367 368 #define CHUNKSIZE 10 369 370 #undef __FUNC__ 371 #define __FUNC__ "MatSetValues_SeqBAIJ" 372 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 373 { 374 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 375 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 376 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 377 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 378 int ridx,cidx,bs2=a->bs2; 379 Scalar *ap,value,*aa=a->a,*bap; 380 381 for ( k=0; k<m; k++ ) { /* loop over added rows */ 382 row = im[k]; brow = row/bs; 383 #if defined(PETSC_BOPT_g) 384 if (row < 0) SETERRQ(1,0,"Negative row"); 385 if (row >= a->m) SETERRQ(1,0,"Row too large"); 386 #endif 387 rp = aj + ai[brow]; 388 ap = aa + bs2*ai[brow]; 389 rmax = imax[brow]; 390 nrow = ailen[brow]; 391 low = 0; 392 for ( l=0; l<n; l++ ) { /* loop over added columns */ 393 #if defined(PETSC_BOPT_g) 394 if (in[l] < 0) SETERRQ(1,0,"Negative column"); 395 if (in[l] >= a->n) SETERRQ(1,0,"Column too large"); 396 #endif 397 col = in[l]; bcol = col/bs; 398 ridx = row % bs; cidx = col % bs; 399 if (roworiented) { 400 value = *v++; 401 } 402 else { 403 value = v[k + l*m]; 404 } 405 if (!sorted) low = 0; high = nrow; 406 while (high-low > 7) { 407 t = (low+high)/2; 408 if (rp[t] > bcol) high = t; 409 else low = t; 410 } 411 for ( i=low; i<high; i++ ) { 412 if (rp[i] > bcol) break; 413 if (rp[i] == bcol) { 414 bap = ap + bs2*i + bs*cidx + ridx; 415 if (is == ADD_VALUES) *bap += value; 416 else *bap = value; 417 goto noinsert; 418 } 419 } 420 if (nonew) goto noinsert; 421 if (nrow >= rmax) { 422 /* there is no extra room in row, therefore enlarge */ 423 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 424 Scalar *new_a; 425 426 /* malloc new storage space */ 427 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 428 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 429 new_j = (int *) (new_a + bs2*new_nz); 430 new_i = new_j + new_nz; 431 432 /* copy over old data into new slots */ 433 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} 434 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 435 PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); 436 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 437 PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, 438 len*sizeof(int)); 439 PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); 440 PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 441 PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), 442 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); 443 /* free up old matrix storage */ 444 PetscFree(a->a); 445 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 446 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 447 a->singlemalloc = 1; 448 449 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 450 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 451 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 452 a->maxnz += bs2*CHUNKSIZE; 453 a->reallocs++; 454 a->nz++; 455 } 456 N = nrow++ - 1; 457 /* shift up all the later entries in this row */ 458 for ( ii=N; ii>=i; ii-- ) { 459 rp[ii+1] = rp[ii]; 460 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 461 } 462 if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 463 rp[i] = bcol; 464 ap[bs2*i + bs*cidx + ridx] = value; 465 noinsert:; 466 low = i; 467 } 468 ailen[brow] = nrow; 469 } 470 return 0; 471 } 472 473 #undef __FUNC__ 474 #define __FUNC__ "MatGetSize_SeqBAIJ" 475 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 476 { 477 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 478 *m = a->m; *n = a->n; 479 return 0; 480 } 481 482 #undef __FUNC__ 483 #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ" 484 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 485 { 486 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 487 *m = 0; *n = a->m; 488 return 0; 489 } 490 491 #undef __FUNC__ 492 #define __FUNC__ "MatGetRow_SeqBAIJ" 493 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 494 { 495 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 496 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 497 Scalar *aa,*v_i,*aa_i; 498 499 bs = a->bs; 500 ai = a->i; 501 aj = a->j; 502 aa = a->a; 503 bs2 = a->bs2; 504 505 if (row < 0 || row >= a->m) SETERRQ(1,0,"Row out of range"); 506 507 bn = row/bs; /* Block number */ 508 bp = row % bs; /* Block Position */ 509 M = ai[bn+1] - ai[bn]; 510 *nz = bs*M; 511 512 if (v) { 513 *v = 0; 514 if (*nz) { 515 *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v); 516 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 517 v_i = *v + i*bs; 518 aa_i = aa + bs2*(ai[bn] + i); 519 for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];} 520 } 521 } 522 } 523 524 if (idx) { 525 *idx = 0; 526 if (*nz) { 527 *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx); 528 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 529 idx_i = *idx + i*bs; 530 itmp = bs*aj[ai[bn] + i]; 531 for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;} 532 } 533 } 534 } 535 return 0; 536 } 537 538 #undef __FUNC__ 539 #define __FUNC__ "MatRestoreRow_SeqBAIJ" 540 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 541 { 542 if (idx) {if (*idx) PetscFree(*idx);} 543 if (v) {if (*v) PetscFree(*v);} 544 return 0; 545 } 546 547 #undef __FUNC__ 548 #define __FUNC__ "MatTranspose_SeqBAIJ" 549 static int MatTranspose_SeqBAIJ(Mat A,Mat *B) 550 { 551 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 552 Mat C; 553 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 554 int *rows,*cols,bs2=a->bs2; 555 Scalar *array=a->a; 556 557 if (B==PETSC_NULL && mbs!=nbs) 558 SETERRQ(1,0,"Square matrix only for in-place"); 559 col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col); 560 PetscMemzero(col,(1+nbs)*sizeof(int)); 561 562 for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1; 563 ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr); 564 PetscFree(col); 565 rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows); 566 cols = rows + bs; 567 for ( i=0; i<mbs; i++ ) { 568 cols[0] = i*bs; 569 for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1; 570 len = ai[i+1] - ai[i]; 571 for ( j=0; j<len; j++ ) { 572 rows[0] = (*aj++)*bs; 573 for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1; 574 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr); 575 array += bs2; 576 } 577 } 578 PetscFree(rows); 579 580 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 581 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 582 583 if (B != PETSC_NULL) { 584 *B = C; 585 } else { 586 /* This isn't really an in-place transpose */ 587 PetscFree(a->a); 588 if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);} 589 if (a->diag) PetscFree(a->diag); 590 if (a->ilen) PetscFree(a->ilen); 591 if (a->imax) PetscFree(a->imax); 592 if (a->solve_work) PetscFree(a->solve_work); 593 PetscFree(a); 594 PetscMemcpy(A,C,sizeof(struct _Mat)); 595 PetscHeaderDestroy(C); 596 } 597 return 0; 598 } 599 600 601 #undef __FUNC__ 602 #define __FUNC__ "MatAssemblyEnd_SeqBAIJ" 603 static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 604 { 605 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 606 int fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax; 607 int m = a->m,*ip, N, *ailen = a->ilen; 608 int mbs = a->mbs, bs2 = a->bs2,rmax; 609 Scalar *aa = a->a, *ap; 610 611 if (mode == MAT_FLUSH_ASSEMBLY) return 0; 612 613 rmax = ailen[0]; 614 for ( i=1; i<mbs; i++ ) { 615 /* move each row back by the amount of empty slots (fshift) before it*/ 616 fshift += imax[i-1] - ailen[i-1]; 617 rmax = PetscMax(rmax,ailen[i]); 618 if (fshift) { 619 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 620 N = ailen[i]; 621 for ( j=0; j<N; j++ ) { 622 ip[j-fshift] = ip[j]; 623 PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar)); 624 } 625 } 626 ai[i] = ai[i-1] + ailen[i-1]; 627 } 628 if (mbs) { 629 fshift += imax[mbs-1] - ailen[mbs-1]; 630 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 631 } 632 /* reset ilen and imax for each row */ 633 for ( i=0; i<mbs; i++ ) { 634 ailen[i] = imax[i] = ai[i+1] - ai[i]; 635 } 636 a->nz = ai[mbs]; 637 638 /* diagonals may have moved, so kill the diagonal pointers */ 639 if (fshift && a->diag) { 640 PetscFree(a->diag); 641 PLogObjectMemory(A,-(m+1)*sizeof(int)); 642 a->diag = 0; 643 } 644 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 645 m,a->n,a->bs,fshift*bs2,a->nz*bs2); 646 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n", 647 a->reallocs); 648 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 649 a->reallocs = 0; 650 A->info.nz_unneeded = (double)fshift*bs2; 651 652 return 0; 653 } 654 655 #undef __FUNC__ 656 #define __FUNC__ "MatZeroEntries_SeqBAIJ" 657 static int MatZeroEntries_SeqBAIJ(Mat A) 658 { 659 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 660 PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar)); 661 return 0; 662 } 663 664 #undef __FUNC__ 665 #define __FUNC__ "MatDestroy_SeqBAIJ" 666 int MatDestroy_SeqBAIJ(PetscObject obj) 667 { 668 Mat A = (Mat) obj; 669 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 670 int ierr; 671 672 #if defined(PETSC_LOG) 673 PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 674 #endif 675 PetscFree(a->a); 676 if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);} 677 if (a->diag) PetscFree(a->diag); 678 if (a->ilen) PetscFree(a->ilen); 679 if (a->imax) PetscFree(a->imax); 680 if (a->solve_work) PetscFree(a->solve_work); 681 if (a->mult_work) PetscFree(a->mult_work); 682 PetscFree(a); 683 if (A->mapping) { 684 ierr = ISLocalToGlobalMappingDestroy(A->mapping); CHKERRQ(ierr); 685 } 686 PLogObjectDestroy(A); 687 PetscHeaderDestroy(A); 688 return 0; 689 } 690 691 #undef __FUNC__ 692 #define __FUNC__ "MatSetOption_SeqBAIJ" 693 static int MatSetOption_SeqBAIJ(Mat A,MatOption op) 694 { 695 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 696 if (op == MAT_ROW_ORIENTED) a->roworiented = 1; 697 else if (op == MAT_COLUMN_ORIENTED) a->roworiented = 0; 698 else if (op == MAT_COLUMNS_SORTED) a->sorted = 1; 699 else if (op == MAT_COLUMNS_UNSORTED) a->sorted = 0; 700 else if (op == MAT_NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 701 else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 702 else if (op == MAT_ROWS_SORTED || 703 op == MAT_ROWS_UNSORTED || 704 op == MAT_SYMMETRIC || 705 op == MAT_STRUCTURALLY_SYMMETRIC || 706 op == MAT_YES_NEW_DIAGONALS || 707 op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES) 708 PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n"); 709 else if (op == MAT_NO_NEW_DIAGONALS) 710 {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");} 711 else 712 {SETERRQ(PETSC_ERR_SUP,0,"unknown option");} 713 return 0; 714 } 715 716 717 /* -------------------------------------------------------*/ 718 /* Should check that shapes of vectors and matrices match */ 719 /* -------------------------------------------------------*/ 720 #include "pinclude/plapack.h" 721 722 #undef __FUNC__ 723 #define __FUNC__ "MatMult_SeqBAIJ_1" 724 static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz) 725 { 726 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 727 register Scalar *x,*z,*v,sum; 728 int mbs=a->mbs,i,*idx,*ii,n; 729 730 VecGetArray_Fast(xx,x); 731 VecGetArray_Fast(zz,z); 732 733 idx = a->j; 734 v = a->a; 735 ii = a->i; 736 737 for ( i=0; i<mbs; i++ ) { 738 n = ii[1] - ii[0]; ii++; 739 sum = 0.0; 740 while (n--) sum += *v++ * x[*idx++]; 741 z[i] = sum; 742 } 743 VecRestoreArray_Fast(xx,x); 744 VecRestoreArray_Fast(zz,z); 745 PLogFlops(2*a->nz - a->m); 746 return 0; 747 } 748 749 #undef __FUNC__ 750 #define __FUNC__ "MatMult_SeqBAIJ_2" 751 static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz) 752 { 753 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 754 register Scalar *x,*z,*v,*xb,sum1,sum2; 755 register Scalar x1,x2; 756 int mbs=a->mbs,i,*idx,*ii; 757 int j,n; 758 759 VecGetArray_Fast(xx,x); 760 VecGetArray_Fast(zz,z); 761 762 idx = a->j; 763 v = a->a; 764 ii = a->i; 765 766 for ( i=0; i<mbs; i++ ) { 767 n = ii[1] - ii[0]; ii++; 768 sum1 = 0.0; sum2 = 0.0; 769 for ( j=0; j<n; j++ ) { 770 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 771 sum1 += v[0]*x1 + v[2]*x2; 772 sum2 += v[1]*x1 + v[3]*x2; 773 v += 4; 774 } 775 z[0] = sum1; z[1] = sum2; 776 z += 2; 777 } 778 VecRestoreArray_Fast(xx,x); 779 VecRestoreArray_Fast(zz,z); 780 PLogFlops(4*a->nz - a->m); 781 return 0; 782 } 783 784 #undef __FUNC__ 785 #define __FUNC__ "MatMult_SeqBAIJ_3" 786 static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz) 787 { 788 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 789 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 790 int mbs=a->mbs,i,*idx,*ii,j,n; 791 792 VecGetArray_Fast(xx,x); 793 VecGetArray_Fast(zz,z); 794 795 idx = a->j; 796 v = a->a; 797 ii = a->i; 798 799 for ( i=0; i<mbs; i++ ) { 800 n = ii[1] - ii[0]; ii++; 801 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 802 for ( j=0; j<n; j++ ) { 803 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 804 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 805 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 806 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 807 v += 9; 808 } 809 z[0] = sum1; z[1] = sum2; z[2] = sum3; 810 z += 3; 811 } 812 VecRestoreArray_Fast(xx,x); 813 VecRestoreArray_Fast(zz,z); 814 PLogFlops(18*a->nz - a->m); 815 return 0; 816 } 817 818 #undef __FUNC__ 819 #define __FUNC__ "MatMult_SeqBAIJ_4" 820 static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz) 821 { 822 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 823 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4; 824 register Scalar x1,x2,x3,x4; 825 int mbs=a->mbs,i,*idx,*ii; 826 int j,n; 827 828 VecGetArray_Fast(xx,x); 829 VecGetArray_Fast(zz,z); 830 831 idx = a->j; 832 v = a->a; 833 ii = a->i; 834 835 for ( i=0; i<mbs; i++ ) { 836 n = ii[1] - ii[0]; ii++; 837 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 838 for ( j=0; j<n; j++ ) { 839 xb = x + 4*(*idx++); 840 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 841 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 842 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 843 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 844 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 845 v += 16; 846 } 847 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 848 z += 4; 849 } 850 VecRestoreArray_Fast(xx,x); 851 VecRestoreArray_Fast(zz,z); 852 PLogFlops(32*a->nz - a->m); 853 return 0; 854 } 855 856 #undef __FUNC__ 857 #define __FUNC__ "MatMult_SeqBAIJ_5" 858 static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz) 859 { 860 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 861 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5; 862 register Scalar x1,x2,x3,x4,x5; 863 int mbs=a->mbs,i,*idx,*ii,j,n; 864 865 VecGetArray_Fast(xx,x); 866 VecGetArray_Fast(zz,z); 867 868 idx = a->j; 869 v = a->a; 870 ii = a->i; 871 872 for ( i=0; i<mbs; i++ ) { 873 n = ii[1] - ii[0]; ii++; 874 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 875 for ( j=0; j<n; j++ ) { 876 xb = x + 5*(*idx++); 877 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 878 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 879 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 880 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 881 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 882 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 883 v += 25; 884 } 885 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 886 z += 5; 887 } 888 VecRestoreArray_Fast(xx,x); 889 VecRestoreArray_Fast(zz,z); 890 PLogFlops(50*a->nz - a->m); 891 return 0; 892 } 893 894 #undef __FUNC__ 895 #define __FUNC__ "MatMult_SeqBAIJ_N" 896 static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz) 897 { 898 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 899 register Scalar *x,*z,*v,*xb; 900 int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2; 901 int _One = 1,ncols,k; 902 Scalar _DOne = 1.0,*work,*workt,_DZero = 0.0; 903 904 VecGetArray_Fast(xx,x); 905 VecGetArray_Fast(zz,z); 906 907 idx = a->j; 908 v = a->a; 909 ii = a->i; 910 911 912 if (!a->mult_work) { 913 k = PetscMax(a->m,a->n); 914 a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work); 915 } 916 work = a->mult_work; 917 for ( i=0; i<mbs; i++ ) { 918 n = ii[1] - ii[0]; ii++; 919 ncols = n*bs; 920 workt = work; 921 for ( j=0; j<n; j++ ) { 922 xb = x + bs*(*idx++); 923 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 924 workt += bs; 925 } 926 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One); 927 v += n*bs2; 928 z += bs; 929 } 930 VecRestoreArray_Fast(xx,x); 931 VecRestoreArray_Fast(zz,z); 932 PLogFlops(2*a->nz*bs2 - a->m); 933 return 0; 934 } 935 936 #undef __FUNC__ 937 #define __FUNC__ "MatMultAdd_SeqBAIJ_1" 938 static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 939 { 940 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 941 register Scalar *x,*y,*z,*v,sum; 942 int mbs=a->mbs,i,*idx,*ii,n; 943 944 VecGetArray_Fast(xx,x); 945 VecGetArray_Fast(yy,y); 946 VecGetArray_Fast(zz,z); 947 948 idx = a->j; 949 v = a->a; 950 ii = a->i; 951 952 for ( i=0; i<mbs; i++ ) { 953 n = ii[1] - ii[0]; ii++; 954 sum = y[i]; 955 while (n--) sum += *v++ * x[*idx++]; 956 z[i] = sum; 957 } 958 VecRestoreArray_Fast(xx,x); 959 VecRestoreArray_Fast(yy,y); 960 VecRestoreArray_Fast(zz,z); 961 PLogFlops(2*a->nz); 962 return 0; 963 } 964 965 #undef __FUNC__ 966 #define __FUNC__ "MatMultAdd_SeqBAIJ_2" 967 static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 968 { 969 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 970 register Scalar *x,*y,*z,*v,*xb,sum1,sum2; 971 register Scalar x1,x2; 972 int mbs=a->mbs,i,*idx,*ii; 973 int j,n; 974 975 VecGetArray_Fast(xx,x); 976 VecGetArray_Fast(yy,y); 977 VecGetArray_Fast(zz,z); 978 979 idx = a->j; 980 v = a->a; 981 ii = a->i; 982 983 for ( i=0; i<mbs; i++ ) { 984 n = ii[1] - ii[0]; ii++; 985 sum1 = y[0]; sum2 = y[1]; 986 for ( j=0; j<n; j++ ) { 987 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 988 sum1 += v[0]*x1 + v[2]*x2; 989 sum2 += v[1]*x1 + v[3]*x2; 990 v += 4; 991 } 992 z[0] = sum1; z[1] = sum2; 993 z += 2; y += 2; 994 } 995 VecRestoreArray_Fast(xx,x); 996 VecRestoreArray_Fast(yy,y); 997 VecRestoreArray_Fast(zz,z); 998 PLogFlops(4*a->nz); 999 return 0; 1000 } 1001 1002 #undef __FUNC__ 1003 #define __FUNC__ "MatMultAdd_SeqBAIJ_3" 1004 static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 1005 { 1006 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1007 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 1008 int mbs=a->mbs,i,*idx,*ii,j,n; 1009 1010 VecGetArray_Fast(xx,x); 1011 VecGetArray_Fast(yy,y); 1012 VecGetArray_Fast(zz,z); 1013 1014 idx = a->j; 1015 v = a->a; 1016 ii = a->i; 1017 1018 for ( i=0; i<mbs; i++ ) { 1019 n = ii[1] - ii[0]; ii++; 1020 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; 1021 for ( j=0; j<n; j++ ) { 1022 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1023 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 1024 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 1025 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 1026 v += 9; 1027 } 1028 z[0] = sum1; z[1] = sum2; z[2] = sum3; 1029 z += 3; y += 3; 1030 } 1031 VecRestoreArray_Fast(xx,x); 1032 VecRestoreArray_Fast(yy,y); 1033 VecRestoreArray_Fast(zz,z); 1034 PLogFlops(18*a->nz); 1035 return 0; 1036 } 1037 1038 #undef __FUNC__ 1039 #define __FUNC__ "MatMultAdd_SeqBAIJ_4" 1040 static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 1041 { 1042 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1043 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4; 1044 register Scalar x1,x2,x3,x4; 1045 int mbs=a->mbs,i,*idx,*ii; 1046 int j,n; 1047 1048 VecGetArray_Fast(xx,x); 1049 VecGetArray_Fast(yy,y); 1050 VecGetArray_Fast(zz,z); 1051 1052 idx = a->j; 1053 v = a->a; 1054 ii = a->i; 1055 1056 for ( i=0; i<mbs; i++ ) { 1057 n = ii[1] - ii[0]; ii++; 1058 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; 1059 for ( j=0; j<n; j++ ) { 1060 xb = x + 4*(*idx++); 1061 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1062 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 1063 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 1064 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 1065 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 1066 v += 16; 1067 } 1068 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 1069 z += 4; y += 4; 1070 } 1071 VecRestoreArray_Fast(xx,x); 1072 VecRestoreArray_Fast(yy,y); 1073 VecRestoreArray_Fast(zz,z); 1074 PLogFlops(32*a->nz); 1075 return 0; 1076 } 1077 1078 #undef __FUNC__ 1079 #define __FUNC__ "MatMultAdd_SeqBAIJ_5" 1080 static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 1081 { 1082 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1083 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5; 1084 register Scalar x1,x2,x3,x4,x5; 1085 int mbs=a->mbs,i,*idx,*ii,j,n; 1086 1087 VecGetArray_Fast(xx,x); 1088 VecGetArray_Fast(yy,y); 1089 VecGetArray_Fast(zz,z); 1090 1091 idx = a->j; 1092 v = a->a; 1093 ii = a->i; 1094 1095 for ( i=0; i<mbs; i++ ) { 1096 n = ii[1] - ii[0]; ii++; 1097 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; 1098 for ( j=0; j<n; j++ ) { 1099 xb = x + 5*(*idx++); 1100 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 1101 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 1102 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 1103 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 1104 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 1105 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 1106 v += 25; 1107 } 1108 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 1109 z += 5; y += 5; 1110 } 1111 VecRestoreArray_Fast(xx,x); 1112 VecRestoreArray_Fast(yy,y); 1113 VecRestoreArray_Fast(zz,z); 1114 PLogFlops(50*a->nz); 1115 return 0; 1116 } 1117 1118 #undef __FUNC__ 1119 #define __FUNC__ "MatMultAdd_SeqBAIJ_N" 1120 static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 1121 { 1122 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1123 register Scalar *x,*z,*v,*xb; 1124 int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr; 1125 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1126 1127 if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 1128 1129 VecGetArray_Fast(xx,x); 1130 VecGetArray_Fast(zz,z); 1131 1132 idx = a->j; 1133 v = a->a; 1134 ii = a->i; 1135 1136 1137 if (!a->mult_work) { 1138 k = PetscMax(a->m,a->n); 1139 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work); 1140 } 1141 work = a->mult_work; 1142 for ( i=0; i<mbs; i++ ) { 1143 n = ii[1] - ii[0]; ii++; 1144 ncols = n*bs; 1145 workt = work; 1146 for ( j=0; j<n; j++ ) { 1147 xb = x + bs*(*idx++); 1148 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 1149 workt += bs; 1150 } 1151 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); 1152 v += n*bs2; 1153 z += bs; 1154 } 1155 VecRestoreArray_Fast(xx,x); 1156 VecRestoreArray_Fast(zz,z); 1157 PLogFlops(2*a->nz*bs2 ); 1158 return 0; 1159 } 1160 1161 #undef __FUNC__ 1162 #define __FUNC__ "MatMultTrans_SeqBAIJ" 1163 static int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz) 1164 { 1165 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1166 Scalar *xg,*zg,*zb; 1167 register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 1168 int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 1169 int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1170 1171 1172 VecGetArray_Fast(xx,xg); x = xg; 1173 VecGetArray_Fast(zz,zg); z = zg; 1174 PetscMemzero(z,N*sizeof(Scalar)); 1175 1176 idx = a->j; 1177 v = a->a; 1178 ii = a->i; 1179 1180 switch (bs) { 1181 case 1: 1182 for ( i=0; i<mbs; i++ ) { 1183 n = ii[1] - ii[0]; ii++; 1184 xb = x + i; x1 = xb[0]; 1185 ib = idx + ai[i]; 1186 for ( j=0; j<n; j++ ) { 1187 rval = ib[j]; 1188 z[rval] += *v++ * x1; 1189 } 1190 } 1191 break; 1192 case 2: 1193 for ( i=0; i<mbs; i++ ) { 1194 n = ii[1] - ii[0]; ii++; 1195 xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1196 ib = idx + ai[i]; 1197 for ( j=0; j<n; j++ ) { 1198 rval = ib[j]*2; 1199 z[rval++] += v[0]*x1 + v[1]*x2; 1200 z[rval++] += v[2]*x1 + v[3]*x2; 1201 v += 4; 1202 } 1203 } 1204 break; 1205 case 3: 1206 for ( i=0; i<mbs; i++ ) { 1207 n = ii[1] - ii[0]; ii++; 1208 xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1209 ib = idx + ai[i]; 1210 for ( j=0; j<n; j++ ) { 1211 rval = ib[j]*3; 1212 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1213 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1214 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1215 v += 9; 1216 } 1217 } 1218 break; 1219 case 4: 1220 for ( i=0; i<mbs; i++ ) { 1221 n = ii[1] - ii[0]; ii++; 1222 xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1223 ib = idx + ai[i]; 1224 for ( j=0; j<n; j++ ) { 1225 rval = ib[j]*4; 1226 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1227 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1228 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1229 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1230 v += 16; 1231 } 1232 } 1233 break; 1234 case 5: 1235 for ( i=0; i<mbs; i++ ) { 1236 n = ii[1] - ii[0]; ii++; 1237 xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1238 x4 = xb[3]; x5 = xb[4]; 1239 ib = idx + ai[i]; 1240 for ( j=0; j<n; j++ ) { 1241 rval = ib[j]*5; 1242 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1243 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1244 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1245 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1246 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1247 v += 25; 1248 } 1249 } 1250 break; 1251 /* block sizes larger then 5 by 5 are handled by BLAS */ 1252 default: { 1253 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1254 if (!a->mult_work) { 1255 k = PetscMax(a->m,a->n); 1256 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1257 CHKPTRQ(a->mult_work); 1258 } 1259 work = a->mult_work; 1260 for ( i=0; i<mbs; i++ ) { 1261 n = ii[1] - ii[0]; ii++; 1262 ncols = n*bs; 1263 PetscMemzero(work,ncols*sizeof(Scalar)); 1264 LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1265 v += n*bs2; 1266 x += bs; 1267 workt = work; 1268 for ( j=0; j<n; j++ ) { 1269 zb = z + bs*(*idx++); 1270 for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1271 workt += bs; 1272 } 1273 } 1274 } 1275 } 1276 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 1277 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1278 PLogFlops(2*a->nz*a->bs2 - a->n); 1279 return 0; 1280 } 1281 1282 #undef __FUNC__ 1283 #define __FUNC__ "MatMultTransAdd_SeqBAIJ" 1284 static int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1285 { 1286 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1287 Scalar *xg,*zg,*zb; 1288 register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 1289 int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 1290 int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1291 1292 1293 1294 VecGetArray_Fast(xx,xg); x = xg; 1295 VecGetArray_Fast(zz,zg); z = zg; 1296 1297 if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 1298 else PetscMemzero(z,N*sizeof(Scalar)); 1299 1300 1301 idx = a->j; 1302 v = a->a; 1303 ii = a->i; 1304 1305 switch (bs) { 1306 case 1: 1307 for ( i=0; i<mbs; i++ ) { 1308 n = ii[1] - ii[0]; ii++; 1309 xb = x + i; x1 = xb[0]; 1310 ib = idx + ai[i]; 1311 for ( j=0; j<n; j++ ) { 1312 rval = ib[j]; 1313 z[rval] += *v++ * x1; 1314 } 1315 } 1316 break; 1317 case 2: 1318 for ( i=0; i<mbs; i++ ) { 1319 n = ii[1] - ii[0]; ii++; 1320 xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1321 ib = idx + ai[i]; 1322 for ( j=0; j<n; j++ ) { 1323 rval = ib[j]*2; 1324 z[rval++] += v[0]*x1 + v[1]*x2; 1325 z[rval++] += v[2]*x1 + v[3]*x2; 1326 v += 4; 1327 } 1328 } 1329 break; 1330 case 3: 1331 for ( i=0; i<mbs; i++ ) { 1332 n = ii[1] - ii[0]; ii++; 1333 xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1334 ib = idx + ai[i]; 1335 for ( j=0; j<n; j++ ) { 1336 rval = ib[j]*3; 1337 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1338 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1339 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1340 v += 9; 1341 } 1342 } 1343 break; 1344 case 4: 1345 for ( i=0; i<mbs; i++ ) { 1346 n = ii[1] - ii[0]; ii++; 1347 xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1348 ib = idx + ai[i]; 1349 for ( j=0; j<n; j++ ) { 1350 rval = ib[j]*4; 1351 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1352 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1353 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1354 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1355 v += 16; 1356 } 1357 } 1358 break; 1359 case 5: 1360 for ( i=0; i<mbs; i++ ) { 1361 n = ii[1] - ii[0]; ii++; 1362 xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1363 x4 = xb[3]; x5 = xb[4]; 1364 ib = idx + ai[i]; 1365 for ( j=0; j<n; j++ ) { 1366 rval = ib[j]*5; 1367 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1368 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1369 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1370 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1371 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1372 v += 25; 1373 } 1374 } 1375 break; 1376 /* block sizes larger then 5 by 5 are handled by BLAS */ 1377 default: { 1378 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1379 if (!a->mult_work) { 1380 k = PetscMax(a->m,a->n); 1381 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1382 CHKPTRQ(a->mult_work); 1383 } 1384 work = a->mult_work; 1385 for ( i=0; i<mbs; i++ ) { 1386 n = ii[1] - ii[0]; ii++; 1387 ncols = n*bs; 1388 PetscMemzero(work,ncols*sizeof(Scalar)); 1389 LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1390 v += n*bs2; 1391 x += bs; 1392 workt = work; 1393 for ( j=0; j<n; j++ ) { 1394 zb = z + bs*(*idx++); 1395 for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1396 workt += bs; 1397 } 1398 } 1399 } 1400 } 1401 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 1402 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1403 PLogFlops(2*a->nz*a->bs2); 1404 return 0; 1405 } 1406 1407 #undef __FUNC__ 1408 #define __FUNC__ "MatGetInfo_SeqBAIJ" 1409 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info) 1410 { 1411 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1412 1413 info->rows_global = (double)a->m; 1414 info->columns_global = (double)a->n; 1415 info->rows_local = (double)a->m; 1416 info->columns_local = (double)a->n; 1417 info->block_size = a->bs2; 1418 info->nz_allocated = a->maxnz; 1419 info->nz_used = a->bs2*a->nz; 1420 info->nz_unneeded = (double)(info->nz_allocated - info->nz_used); 1421 /* if (info->nz_unneeded != A->info.nz_unneeded) 1422 printf("space descrepancy: maxnz-nz = %d, nz_unneeded = %d\n",(int)info->nz_unneeded,(int)A->info.nz_unneeded); */ 1423 info->assemblies = A->num_ass; 1424 info->mallocs = a->reallocs; 1425 info->memory = A->mem; 1426 if (A->factor) { 1427 info->fill_ratio_given = A->info.fill_ratio_given; 1428 info->fill_ratio_needed = A->info.fill_ratio_needed; 1429 info->factor_mallocs = A->info.factor_mallocs; 1430 } else { 1431 info->fill_ratio_given = 0; 1432 info->fill_ratio_needed = 0; 1433 info->factor_mallocs = 0; 1434 } 1435 return 0; 1436 } 1437 1438 #undef __FUNC__ 1439 #define __FUNC__ "MatEqual_SeqBAIJ" 1440 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg) 1441 { 1442 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 1443 1444 if (B->type !=MATSEQBAIJ)SETERRQ(1,0,"Matrices must be same type"); 1445 1446 /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 1447 if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)|| 1448 (a->nz != b->nz)) { 1449 *flg = PETSC_FALSE; return 0; 1450 } 1451 1452 /* if the a->i are the same */ 1453 if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) { 1454 *flg = PETSC_FALSE; return 0; 1455 } 1456 1457 /* if a->j are the same */ 1458 if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) { 1459 *flg = PETSC_FALSE; return 0; 1460 } 1461 1462 /* if a->a are the same */ 1463 if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) { 1464 *flg = PETSC_FALSE; return 0; 1465 } 1466 *flg = PETSC_TRUE; 1467 return 0; 1468 1469 } 1470 1471 #undef __FUNC__ 1472 #define __FUNC__ "MatGetDiagonal_SeqBAIJ" 1473 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v) 1474 { 1475 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1476 int i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 1477 Scalar *x, zero = 0.0,*aa,*aa_j; 1478 1479 if (A->factor) SETERRQ(1,0,"Not for factored matrix"); 1480 bs = a->bs; 1481 aa = a->a; 1482 ai = a->i; 1483 aj = a->j; 1484 ambs = a->mbs; 1485 bs2 = a->bs2; 1486 1487 VecSet(&zero,v); 1488 VecGetArray_Fast(v,x); VecGetLocalSize_Fast(v,n); 1489 if (n != a->m) SETERRQ(1,0,"Nonconforming matrix and vector"); 1490 for ( i=0; i<ambs; i++ ) { 1491 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1492 if (aj[j] == i) { 1493 row = i*bs; 1494 aa_j = aa+j*bs2; 1495 for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 1496 break; 1497 } 1498 } 1499 } 1500 return 0; 1501 } 1502 1503 #undef __FUNC__ 1504 #define __FUNC__ "MatDiagonalScale_SeqBAIJ" 1505 static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr) 1506 { 1507 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1508 Scalar *l,*r,x,*v,*aa,*li,*ri; 1509 int i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2; 1510 1511 ai = a->i; 1512 aj = a->j; 1513 aa = a->a; 1514 m = a->m; 1515 n = a->n; 1516 bs = a->bs; 1517 mbs = a->mbs; 1518 bs2 = a->bs2; 1519 if (ll) { 1520 VecGetArray_Fast(ll,l); VecGetLocalSize_Fast(ll,lm); 1521 if (lm != m) SETERRQ(1,0,"Left scaling vector wrong length"); 1522 for ( i=0; i<mbs; i++ ) { /* for each block row */ 1523 M = ai[i+1] - ai[i]; 1524 li = l + i*bs; 1525 v = aa + bs2*ai[i]; 1526 for ( j=0; j<M; j++ ) { /* for each block */ 1527 for ( k=0; k<bs2; k++ ) { 1528 (*v++) *= li[k%bs]; 1529 } 1530 } 1531 } 1532 } 1533 1534 if (rr) { 1535 VecGetArray_Fast(rr,r); VecGetLocalSize_Fast(rr,rn); 1536 if (rn != n) SETERRQ(1,0,"Right scaling vector wrong length"); 1537 for ( i=0; i<mbs; i++ ) { /* for each block row */ 1538 M = ai[i+1] - ai[i]; 1539 v = aa + bs2*ai[i]; 1540 for ( j=0; j<M; j++ ) { /* for each block */ 1541 ri = r + bs*aj[ai[i]+j]; 1542 for ( k=0; k<bs; k++ ) { 1543 x = ri[k]; 1544 for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x; 1545 } 1546 } 1547 } 1548 } 1549 return 0; 1550 } 1551 1552 1553 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 1554 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 1555 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int); 1556 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*); 1557 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**); 1558 1559 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec); 1560 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec); 1561 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec); 1562 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec); 1563 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec); 1564 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec); 1565 1566 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 1567 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 1568 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 1569 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 1570 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 1571 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 1572 1573 #undef __FUNC__ 1574 #define __FUNC__ "MatNorm_SeqBAIJ" 1575 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm) 1576 { 1577 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1578 Scalar *v = a->a; 1579 double sum = 0.0; 1580 int i,nz=a->nz,bs2=a->bs2; 1581 1582 if (type == NORM_FROBENIUS) { 1583 for (i=0; i< bs2*nz; i++ ) { 1584 #if defined(PETSC_COMPLEX) 1585 sum += real(conj(*v)*(*v)); v++; 1586 #else 1587 sum += (*v)*(*v); v++; 1588 #endif 1589 } 1590 *norm = sqrt(sum); 1591 } 1592 else { 1593 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 1594 } 1595 return 0; 1596 } 1597 1598 /* 1599 note: This can only work for identity for row and col. It would 1600 be good to check this and otherwise generate an error. 1601 */ 1602 #undef __FUNC__ 1603 #define __FUNC__ "MatILUFactor_SeqBAIJ" 1604 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill) 1605 { 1606 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1607 Mat outA; 1608 int ierr; 1609 1610 if (fill != 0) SETERRQ(1,0,"Only fill=0 supported"); 1611 1612 outA = inA; 1613 inA->factor = FACTOR_LU; 1614 a->row = row; 1615 a->col = col; 1616 1617 a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 1618 1619 if (!a->diag) { 1620 ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr); 1621 } 1622 ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr); 1623 return 0; 1624 } 1625 1626 #undef __FUNC__ 1627 #define __FUNC__ "MatScale_SeqBAIJ" 1628 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA) 1629 { 1630 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1631 int one = 1, totalnz = a->bs2*a->nz; 1632 BLscal_( &totalnz, alpha, a->a, &one ); 1633 PLogFlops(totalnz); 1634 return 0; 1635 } 1636 1637 #undef __FUNC__ 1638 #define __FUNC__ "MatGetValues_SeqBAIJ" 1639 static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 1640 { 1641 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1642 int *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 1643 int *ai = a->i, *ailen = a->ilen; 1644 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 1645 Scalar *ap, *aa = a->a, zero = 0.0; 1646 1647 for ( k=0; k<m; k++ ) { /* loop over rows */ 1648 row = im[k]; brow = row/bs; 1649 if (row < 0) SETERRQ(1,0,"Negative row"); 1650 if (row >= a->m) SETERRQ(1,0,"Row too large"); 1651 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1652 nrow = ailen[brow]; 1653 for ( l=0; l<n; l++ ) { /* loop over columns */ 1654 if (in[l] < 0) SETERRQ(1,0,"Negative column"); 1655 if (in[l] >= a->n) SETERRQ(1,0,"Column too large"); 1656 col = in[l] ; 1657 bcol = col/bs; 1658 cidx = col%bs; 1659 ridx = row%bs; 1660 high = nrow; 1661 low = 0; /* assume unsorted */ 1662 while (high-low > 5) { 1663 t = (low+high)/2; 1664 if (rp[t] > bcol) high = t; 1665 else low = t; 1666 } 1667 for ( i=low; i<high; i++ ) { 1668 if (rp[i] > bcol) break; 1669 if (rp[i] == bcol) { 1670 *v++ = ap[bs2*i+bs*cidx+ridx]; 1671 goto finished; 1672 } 1673 } 1674 *v++ = zero; 1675 finished:; 1676 } 1677 } 1678 return 0; 1679 } 1680 1681 #undef __FUNC__ 1682 #define __FUNC__ "MatGetBlockSize_SeqBAIJ" 1683 static int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs) 1684 { 1685 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data; 1686 *bs = baij->bs; 1687 return 0; 1688 } 1689 1690 /* idx should be of length atlease bs */ 1691 #undef __FUNC__ 1692 #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Block" 1693 static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg) 1694 { 1695 int i,row; 1696 row = idx[0]; 1697 if (row%bs!=0) { *flg = PETSC_FALSE; return 0; } 1698 1699 for ( i=1; i<bs; i++ ) { 1700 if (row+i != idx[i]) { *flg = PETSC_FALSE; return 0; } 1701 } 1702 *flg = PETSC_TRUE; 1703 return 0; 1704 } 1705 1706 #undef __FUNC__ 1707 #define __FUNC__ "MatZeroRows_SeqBAIJ" 1708 static int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag) 1709 { 1710 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1711 IS is_local; 1712 int ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2; 1713 PetscTruth flg; 1714 Scalar zero = 0.0,*aa; 1715 1716 /* Make a copy of the IS and sort it */ 1717 ierr = ISGetSize(is,&is_n);CHKERRQ(ierr); 1718 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1719 ierr = ISCreateGeneral(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr); 1720 ierr = ISSort(is_local); CHKERRQ(ierr); 1721 ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr); 1722 1723 i = 0; 1724 while (i < is_n) { 1725 if (rows[i]<0 || rows[i]>m) SETERRQ(1,0,"row out of range"); 1726 flg = PETSC_FALSE; 1727 if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); } 1728 if (flg) { /* There exists a block of rows to be Zerowed */ 1729 baij->ilen[rows[i]/bs] = 0; 1730 i += bs; 1731 } else { /* Zero out only the requested row */ 1732 count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs; 1733 aa = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs); 1734 for ( j=0; j<count; j++ ) { 1735 aa[0] = zero; 1736 aa+=bs; 1737 } 1738 i++; 1739 } 1740 } 1741 if (diag) { 1742 for ( j=0; j<is_n; j++ ) { 1743 ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1744 } 1745 } 1746 ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr); 1747 ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr); 1748 ierr = ISDestroy(is_local); CHKERRQ(ierr); 1749 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1750 1751 return 0; 1752 } 1753 1754 #undef __FUNC__ 1755 #define __FUNC__ "MatPrintHelp_SeqBAIJ" 1756 int MatPrintHelp_SeqBAIJ(Mat A) 1757 { 1758 static int called = 0; 1759 MPI_Comm comm = A->comm; 1760 1761 if (called) return 0; else called = 1; 1762 PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n"); 1763 PetscPrintf(comm," -mat_block_size <block_size>\n"); 1764 return 0; 1765 } 1766 1767 /* -------------------------------------------------------------------*/ 1768 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ, 1769 MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ, 1770 MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N, 1771 MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ, 1772 MatSolve_SeqBAIJ_N,0, 1773 0,0, 1774 MatLUFactor_SeqBAIJ,0, 1775 0, 1776 MatTranspose_SeqBAIJ, 1777 MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ, 1778 MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ, 1779 0,MatAssemblyEnd_SeqBAIJ, 1780 0, 1781 MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ, 1782 MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0, 1783 MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ, 1784 MatILUFactorSymbolic_SeqBAIJ,0, 1785 0,0, 1786 MatConvertSameType_SeqBAIJ,0,0, 1787 MatILUFactor_SeqBAIJ,0,0, 1788 MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ, 1789 MatGetValues_SeqBAIJ,0, 1790 MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ, 1791 0,0,0,MatGetBlockSize_SeqBAIJ, 1792 MatGetRowIJ_SeqBAIJ, 1793 MatRestoreRowIJ_SeqBAIJ}; 1794 1795 #undef __FUNC__ 1796 #define __FUNC__ "MatCreateSeqBAIJ" 1797 /*@C 1798 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1799 compressed row) format. For good matrix assembly performance the 1800 user should preallocate the matrix storage by setting the parameter nz 1801 (or the array nzz). By setting these parameters accurately, performance 1802 during matrix assembly can be increased by more than a factor of 50. 1803 1804 Input Parameters: 1805 . comm - MPI communicator, set to MPI_COMM_SELF 1806 . bs - size of block 1807 . m - number of rows 1808 . n - number of columns 1809 . nz - number of block nonzeros per block row (same for all rows) 1810 . nzz - array containing the number of block nonzeros in the various block rows 1811 (possibly different for each block row) or PETSC_NULL 1812 1813 Output Parameter: 1814 . A - the matrix 1815 1816 Options Database Keys: 1817 $ -mat_no_unroll - uses code that does not unroll the loops in the 1818 $ block calculations (much solver) 1819 $ -mat_block_size - size of the blocks to use 1820 1821 Notes: 1822 The block AIJ format is fully compatible with standard Fortran 77 1823 storage. That is, the stored row and column indices can begin at 1824 either one (as in Fortran) or zero. See the users' manual for details. 1825 1826 Specify the preallocated storage with either nz or nnz (not both). 1827 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1828 allocation. For additional details, see the users manual chapter on 1829 matrices. 1830 1831 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1832 @*/ 1833 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 1834 { 1835 Mat B; 1836 Mat_SeqBAIJ *b; 1837 int i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs,size; 1838 1839 MPI_Comm_size(comm,&size); 1840 if (size > 1) SETERRQ(1,0,"Comm must be of size 1"); 1841 1842 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr); 1843 1844 if (mbs*bs!=m || nbs*bs!=n) 1845 SETERRQ(1,0,"Number rows, cols must be divisible by blocksize"); 1846 1847 *A = 0; 1848 PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm); 1849 PLogObjectCreate(B); 1850 B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b); 1851 PetscMemzero(b,sizeof(Mat_SeqBAIJ)); 1852 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 1853 ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr); 1854 if (!flg) { 1855 switch (bs) { 1856 case 1: 1857 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1858 B->ops.solve = MatSolve_SeqBAIJ_1; 1859 B->ops.mult = MatMult_SeqBAIJ_1; 1860 B->ops.multadd = MatMultAdd_SeqBAIJ_1; 1861 break; 1862 case 2: 1863 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 1864 B->ops.solve = MatSolve_SeqBAIJ_2; 1865 B->ops.mult = MatMult_SeqBAIJ_2; 1866 B->ops.multadd = MatMultAdd_SeqBAIJ_2; 1867 break; 1868 case 3: 1869 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 1870 B->ops.solve = MatSolve_SeqBAIJ_3; 1871 B->ops.mult = MatMult_SeqBAIJ_3; 1872 B->ops.multadd = MatMultAdd_SeqBAIJ_3; 1873 break; 1874 case 4: 1875 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 1876 B->ops.solve = MatSolve_SeqBAIJ_4; 1877 B->ops.mult = MatMult_SeqBAIJ_4; 1878 B->ops.multadd = MatMultAdd_SeqBAIJ_4; 1879 break; 1880 case 5: 1881 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 1882 B->ops.solve = MatSolve_SeqBAIJ_5; 1883 B->ops.mult = MatMult_SeqBAIJ_5; 1884 B->ops.multadd = MatMultAdd_SeqBAIJ_5; 1885 break; 1886 } 1887 } 1888 B->destroy = MatDestroy_SeqBAIJ; 1889 B->view = MatView_SeqBAIJ; 1890 B->factor = 0; 1891 B->lupivotthreshold = 1.0; 1892 B->mapping = 0; 1893 b->row = 0; 1894 b->col = 0; 1895 b->reallocs = 0; 1896 1897 b->m = m; B->m = m; B->M = m; 1898 b->n = n; B->n = n; B->N = n; 1899 b->mbs = mbs; 1900 b->nbs = nbs; 1901 b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax); 1902 if (nnz == PETSC_NULL) { 1903 if (nz == PETSC_DEFAULT) nz = 5; 1904 else if (nz <= 0) nz = 1; 1905 for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 1906 nz = nz*mbs; 1907 } 1908 else { 1909 nz = 0; 1910 for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 1911 } 1912 1913 /* allocate the matrix space */ 1914 len = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int); 1915 b->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a); 1916 PetscMemzero(b->a,nz*bs2*sizeof(Scalar)); 1917 b->j = (int *) (b->a + nz*bs2); 1918 PetscMemzero(b->j,nz*sizeof(int)); 1919 b->i = b->j + nz; 1920 b->singlemalloc = 1; 1921 1922 b->i[0] = 0; 1923 for (i=1; i<mbs+1; i++) { 1924 b->i[i] = b->i[i-1] + b->imax[i-1]; 1925 } 1926 1927 /* b->ilen will count nonzeros in each block row so far. */ 1928 b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 1929 PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 1930 for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 1931 1932 b->bs = bs; 1933 b->bs2 = bs2; 1934 b->mbs = mbs; 1935 b->nz = 0; 1936 b->maxnz = nz*bs2; 1937 b->sorted = 0; 1938 b->roworiented = 1; 1939 b->nonew = 0; 1940 b->diag = 0; 1941 b->solve_work = 0; 1942 b->mult_work = 0; 1943 b->spptr = 0; 1944 B->info.nz_unneeded = (double)b->maxnz; 1945 1946 *A = B; 1947 ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr); 1948 if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); } 1949 return 0; 1950 } 1951 1952 #undef __FUNC__ 1953 #define __FUNC__ "MatConvertSameType_SeqBAIJ" 1954 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues) 1955 { 1956 Mat C; 1957 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 1958 int i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 1959 1960 if (a->i[mbs] != nz) SETERRQ(1,0,"Corrupt matrix"); 1961 1962 *B = 0; 1963 PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm); 1964 PLogObjectCreate(C); 1965 C->data = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c); 1966 PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps)); 1967 C->destroy = MatDestroy_SeqBAIJ; 1968 C->view = MatView_SeqBAIJ; 1969 C->factor = A->factor; 1970 c->row = 0; 1971 c->col = 0; 1972 C->assembled = PETSC_TRUE; 1973 1974 c->m = C->m = a->m; 1975 c->n = C->n = a->n; 1976 C->M = a->m; 1977 C->N = a->n; 1978 1979 c->bs = a->bs; 1980 c->bs2 = a->bs2; 1981 c->mbs = a->mbs; 1982 c->nbs = a->nbs; 1983 1984 c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax); 1985 c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen); 1986 for ( i=0; i<mbs; i++ ) { 1987 c->imax[i] = a->imax[i]; 1988 c->ilen[i] = a->ilen[i]; 1989 } 1990 1991 /* allocate the matrix space */ 1992 c->singlemalloc = 1; 1993 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int)); 1994 c->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a); 1995 c->j = (int *) (c->a + nz*bs2); 1996 c->i = c->j + nz; 1997 PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int)); 1998 if (mbs > 0) { 1999 PetscMemcpy(c->j,a->j,nz*sizeof(int)); 2000 if (cpvalues == COPY_VALUES) { 2001 PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar)); 2002 } 2003 } 2004 2005 PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 2006 c->sorted = a->sorted; 2007 c->roworiented = a->roworiented; 2008 c->nonew = a->nonew; 2009 2010 if (a->diag) { 2011 c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag); 2012 PLogObjectMemory(C,(mbs+1)*sizeof(int)); 2013 for ( i=0; i<mbs; i++ ) { 2014 c->diag[i] = a->diag[i]; 2015 } 2016 } 2017 else c->diag = 0; 2018 c->nz = a->nz; 2019 c->maxnz = a->maxnz; 2020 c->solve_work = 0; 2021 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 2022 c->mult_work = 0; 2023 *B = C; 2024 return 0; 2025 } 2026 2027 #undef __FUNC__ 2028 #define __FUNC__ "MatLoad_SeqBAIJ" 2029 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 2030 { 2031 Mat_SeqBAIJ *a; 2032 Mat B; 2033 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg; 2034 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2035 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 2036 int *masked, nmask,tmp,bs2,ishift; 2037 Scalar *aa; 2038 MPI_Comm comm = ((PetscObject) viewer)->comm; 2039 2040 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 2041 bs2 = bs*bs; 2042 2043 MPI_Comm_size(comm,&size); 2044 if (size > 1) SETERRQ(1,0,"view must have one processor"); 2045 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 2046 ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr); 2047 if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not Mat object"); 2048 M = header[1]; N = header[2]; nz = header[3]; 2049 2050 if (M != N) SETERRQ(1,0,"Can only do square matrices"); 2051 2052 /* 2053 This code adds extra rows to make sure the number of rows is 2054 divisible by the blocksize 2055 */ 2056 mbs = M/bs; 2057 extra_rows = bs - M + bs*(mbs); 2058 if (extra_rows == bs) extra_rows = 0; 2059 else mbs++; 2060 if (extra_rows) { 2061 PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 2062 } 2063 2064 /* read in row lengths */ 2065 rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 2066 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 2067 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 2068 2069 /* read in column indices */ 2070 jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj); 2071 ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr); 2072 for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 2073 2074 /* loop over row lengths determining block row lengths */ 2075 browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 2076 PetscMemzero(browlengths,mbs*sizeof(int)); 2077 mask = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask); 2078 PetscMemzero(mask,mbs*sizeof(int)); 2079 masked = mask + mbs; 2080 rowcount = 0; nzcount = 0; 2081 for ( i=0; i<mbs; i++ ) { 2082 nmask = 0; 2083 for ( j=0; j<bs; j++ ) { 2084 kmax = rowlengths[rowcount]; 2085 for ( k=0; k<kmax; k++ ) { 2086 tmp = jj[nzcount++]/bs; 2087 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2088 } 2089 rowcount++; 2090 } 2091 browlengths[i] += nmask; 2092 /* zero out the mask elements we set */ 2093 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 2094 } 2095 2096 /* create our matrix */ 2097 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A); 2098 CHKERRQ(ierr); 2099 B = *A; 2100 a = (Mat_SeqBAIJ *) B->data; 2101 2102 /* set matrix "i" values */ 2103 a->i[0] = 0; 2104 for ( i=1; i<= mbs; i++ ) { 2105 a->i[i] = a->i[i-1] + browlengths[i-1]; 2106 a->ilen[i-1] = browlengths[i-1]; 2107 } 2108 a->nz = 0; 2109 for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 2110 2111 /* read in nonzero values */ 2112 aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 2113 ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr); 2114 for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 2115 2116 /* set "a" and "j" values into matrix */ 2117 nzcount = 0; jcount = 0; 2118 for ( i=0; i<mbs; i++ ) { 2119 nzcountb = nzcount; 2120 nmask = 0; 2121 for ( j=0; j<bs; j++ ) { 2122 kmax = rowlengths[i*bs+j]; 2123 for ( k=0; k<kmax; k++ ) { 2124 tmp = jj[nzcount++]/bs; 2125 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2126 } 2127 rowcount++; 2128 } 2129 /* sort the masked values */ 2130 PetscSortInt(nmask,masked); 2131 2132 /* set "j" values into matrix */ 2133 maskcount = 1; 2134 for ( j=0; j<nmask; j++ ) { 2135 a->j[jcount++] = masked[j]; 2136 mask[masked[j]] = maskcount++; 2137 } 2138 /* set "a" values into matrix */ 2139 ishift = bs2*a->i[i]; 2140 for ( j=0; j<bs; j++ ) { 2141 kmax = rowlengths[i*bs+j]; 2142 for ( k=0; k<kmax; k++ ) { 2143 tmp = jj[nzcountb]/bs ; 2144 block = mask[tmp] - 1; 2145 point = jj[nzcountb] - bs*tmp; 2146 idx = ishift + bs2*block + j + bs*point; 2147 a->a[idx] = aa[nzcountb++]; 2148 } 2149 } 2150 /* zero out the mask elements we set */ 2151 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 2152 } 2153 if (jcount != a->nz) SETERRQ(1,0,"Error bad binary matrix"); 2154 2155 PetscFree(rowlengths); 2156 PetscFree(browlengths); 2157 PetscFree(aa); 2158 PetscFree(jj); 2159 PetscFree(mask); 2160 2161 B->assembled = PETSC_TRUE; 2162 2163 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr); 2164 if (flg) { 2165 Viewer tviewer; 2166 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2167 ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO,0);CHKERRQ(ierr); 2168 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2169 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2170 } 2171 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr); 2172 if (flg) { 2173 Viewer tviewer; 2174 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2175 ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO_LONG,0);CHKERRQ(ierr); 2176 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2177 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2178 } 2179 ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr); 2180 if (flg) { 2181 Viewer tviewer; 2182 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2183 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2184 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2185 } 2186 ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr); 2187 if (flg) { 2188 Viewer tviewer; 2189 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2190 ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_MATLAB,"M");CHKERRQ(ierr); 2191 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2192 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2193 } 2194 ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr); 2195 if (flg) { 2196 Viewer tviewer; 2197 ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr); 2198 ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr); 2199 ierr = ViewerFlush(tviewer); CHKERRQ(ierr); 2200 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2201 } 2202 return 0; 2203 } 2204 2205 2206 2207