1 #ifndef lint 2 static char vcid[] = "$Id: baij.c,v 1.78 1996/12/02 21:31:10 bsmith Exp balay $"; 3 #endif 4 5 /* 6 Defines the basic matrix operations for the BAIJ (compressed row) 7 matrix storage format. 8 */ 9 #include "src/mat/impls/baij/seq/baij.h" 10 #include "src/vec/vecimpl.h" 11 #include "src/inline/spops.h" 12 #include "petsc.h" 13 14 15 /* 16 Adds diagonal pointers to sparse matrix structure. 17 */ 18 19 #undef __FUNCTION__ 20 #define __FUNCTION__ "MatMarkDiag_SeqBAIJ" 21 int MatMarkDiag_SeqBAIJ(Mat A) 22 { 23 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 24 int i,j, *diag, m = a->mbs; 25 26 diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag); 27 PLogObjectMemory(A,(m+1)*sizeof(int)); 28 for ( i=0; i<m; i++ ) { 29 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 30 if (a->j[j] == i) { 31 diag[i] = j; 32 break; 33 } 34 } 35 } 36 a->diag = diag; 37 return 0; 38 } 39 40 #include "draw.h" 41 #include "pinclude/pviewer.h" 42 #include "sys.h" 43 44 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 45 46 #undef __FUNCTION__ 47 #define __FUNCTION__ "MatGetRowIJ_SeqBAIJ" 48 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 49 PetscTruth *done) 50 { 51 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 52 int ierr, n = a->mbs,i; 53 54 *nn = n; 55 if (!ia) return 0; 56 if (symmetric) { 57 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 58 } else if (oshift == 1) { 59 /* temporarily add 1 to i and j indices */ 60 int nz = a->i[n] + 1; 61 for ( i=0; i<nz; i++ ) a->j[i]++; 62 for ( i=0; i<n+1; i++ ) a->i[i]++; 63 *ia = a->i; *ja = a->j; 64 } else { 65 *ia = a->i; *ja = a->j; 66 } 67 68 return 0; 69 } 70 71 #undef __FUNCTION__ 72 #define __FUNCTION__ "MatRestoreRowIJ_SeqBAIJ" 73 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 74 PetscTruth *done) 75 { 76 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 77 int i,n = a->mbs; 78 79 if (!ia) return 0; 80 if (symmetric) { 81 PetscFree(*ia); 82 PetscFree(*ja); 83 } else if (oshift == 1) { 84 int nz = a->i[n]; 85 for ( i=0; i<nz; i++ ) a->j[i]--; 86 for ( i=0; i<n+1; i++ ) a->i[i]--; 87 } 88 return 0; 89 } 90 91 92 #undef __FUNCTION__ 93 #define __FUNCTION__ "MatView_SeqBAIJ_Binary" 94 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 95 { 96 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 97 int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 98 Scalar *aa; 99 100 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 101 col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 102 col_lens[0] = MAT_COOKIE; 103 104 col_lens[1] = a->m; 105 col_lens[2] = a->n; 106 col_lens[3] = a->nz*bs2; 107 108 /* store lengths of each row and write (including header) to file */ 109 count = 0; 110 for ( i=0; i<a->mbs; i++ ) { 111 for ( j=0; j<bs; j++ ) { 112 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 113 } 114 } 115 ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr); 116 PetscFree(col_lens); 117 118 /* store column indices (zero start index) */ 119 jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj); 120 count = 0; 121 for ( i=0; i<a->mbs; i++ ) { 122 for ( j=0; j<bs; j++ ) { 123 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 124 for ( l=0; l<bs; l++ ) { 125 jj[count++] = bs*a->j[k] + l; 126 } 127 } 128 } 129 } 130 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr); 131 PetscFree(jj); 132 133 /* store nonzero values */ 134 aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa); 135 count = 0; 136 for ( i=0; i<a->mbs; i++ ) { 137 for ( j=0; j<bs; j++ ) { 138 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 139 for ( l=0; l<bs; l++ ) { 140 aa[count++] = a->a[bs2*k + l*bs + j]; 141 } 142 } 143 } 144 } 145 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr); 146 PetscFree(aa); 147 return 0; 148 } 149 150 #undef __FUNCTION__ 151 #define __FUNCTION__ "MatView_SeqBAIJ_ASCII" 152 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 153 { 154 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 155 int ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2; 156 FILE *fd; 157 char *outputname; 158 159 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 160 ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr); 161 ierr = ViewerGetFormat(viewer,&format); 162 if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) { 163 fprintf(fd," block size is %d\n",bs); 164 } 165 else if (format == VIEWER_FORMAT_ASCII_MATLAB) { 166 SETERRQ(1,"MatView_SeqBAIJ_ASCII:Matlab format not supported"); 167 } 168 else if (format == VIEWER_FORMAT_ASCII_COMMON) { 169 for ( i=0; i<a->mbs; i++ ) { 170 for ( j=0; j<bs; j++ ) { 171 fprintf(fd,"row %d:",i*bs+j); 172 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 173 for ( l=0; l<bs; l++ ) { 174 #if defined(PETSC_COMPLEX) 175 if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0) 176 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 177 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 178 else if (real(a->a[bs2*k + l*bs + j]) != 0.0) 179 fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 180 #else 181 if (a->a[bs2*k + l*bs + j] != 0.0) 182 fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 183 #endif 184 } 185 } 186 fprintf(fd,"\n"); 187 } 188 } 189 } 190 else { 191 for ( i=0; i<a->mbs; i++ ) { 192 for ( j=0; j<bs; j++ ) { 193 fprintf(fd,"row %d:",i*bs+j); 194 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 195 for ( l=0; l<bs; l++ ) { 196 #if defined(PETSC_COMPLEX) 197 if (imag(a->a[bs2*k + l*bs + j]) != 0.0) { 198 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 199 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 200 } 201 else { 202 fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 203 } 204 #else 205 fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 206 #endif 207 } 208 } 209 fprintf(fd,"\n"); 210 } 211 } 212 } 213 fflush(fd); 214 return 0; 215 } 216 217 #undef __FUNCTION__ 218 #define __FUNCTION__ "MatView_SeqBAIJ_Draw" 219 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer) 220 { 221 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *) A->data; 222 int row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2; 223 double xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r; 224 Scalar *aa; 225 Draw draw; 226 DrawButton button; 227 PetscTruth isnull; 228 229 ViewerDrawGetDraw(viewer,&draw); 230 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 231 232 xr = a->n; yr = a->m; h = yr/10.0; w = xr/10.0; 233 xr += w; yr += h; xl = -w; yl = -h; 234 ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr); 235 /* loop over matrix elements drawing boxes */ 236 color = DRAW_BLUE; 237 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 238 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 239 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 240 x_l = a->j[j]*bs; x_r = x_l + 1.0; 241 aa = a->a + j*bs2; 242 for ( k=0; k<bs; k++ ) { 243 for ( l=0; l<bs; l++ ) { 244 if (PetscReal(*aa++) >= 0.) continue; 245 DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 246 } 247 } 248 } 249 } 250 color = DRAW_CYAN; 251 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 252 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 253 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 254 x_l = a->j[j]*bs; x_r = x_l + 1.0; 255 aa = a->a + j*bs2; 256 for ( k=0; k<bs; k++ ) { 257 for ( l=0; l<bs; l++ ) { 258 if (PetscReal(*aa++) != 0.) continue; 259 DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 260 } 261 } 262 } 263 } 264 265 color = DRAW_RED; 266 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 267 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 268 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 269 x_l = a->j[j]*bs; x_r = x_l + 1.0; 270 aa = a->a + j*bs2; 271 for ( k=0; k<bs; k++ ) { 272 for ( l=0; l<bs; l++ ) { 273 if (PetscReal(*aa++) <= 0.) continue; 274 DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 275 } 276 } 277 } 278 } 279 280 DrawFlush(draw); 281 DrawGetPause(draw,&pause); 282 if (pause >= 0) { PetscSleep(pause); return 0;} 283 284 /* allow the matrix to zoom or shrink */ 285 ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0); 286 while (button != BUTTON_RIGHT) { 287 DrawClear(draw); 288 if (button == BUTTON_LEFT) scale = .5; 289 else if (button == BUTTON_CENTER) scale = 2.; 290 xl = scale*(xl + w - xc) + xc - w*scale; 291 xr = scale*(xr - w - xc) + xc + w*scale; 292 yl = scale*(yl + h - yc) + yc - h*scale; 293 yr = scale*(yr - h - yc) + yc + h*scale; 294 w *= scale; h *= scale; 295 ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr); 296 color = DRAW_BLUE; 297 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 298 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 299 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 300 x_l = a->j[j]*bs; x_r = x_l + 1.0; 301 aa = a->a + j*bs2; 302 for ( k=0; k<bs; k++ ) { 303 for ( l=0; l<bs; l++ ) { 304 if (PetscReal(*aa++) >= 0.) continue; 305 DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 306 } 307 } 308 } 309 } 310 color = DRAW_CYAN; 311 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 312 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 313 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 314 x_l = a->j[j]*bs; x_r = x_l + 1.0; 315 aa = a->a + j*bs2; 316 for ( k=0; k<bs; k++ ) { 317 for ( l=0; l<bs; l++ ) { 318 if (PetscReal(*aa++) != 0.) continue; 319 DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 320 } 321 } 322 } 323 } 324 325 color = DRAW_RED; 326 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 327 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 328 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 329 x_l = a->j[j]*bs; x_r = x_l + 1.0; 330 aa = a->a + j*bs2; 331 for ( k=0; k<bs; k++ ) { 332 for ( l=0; l<bs; l++ ) { 333 if (PetscReal(*aa++) <= 0.) continue; 334 DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 335 } 336 } 337 } 338 } 339 ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0); 340 } 341 return 0; 342 } 343 344 #undef __FUNCTION__ 345 #define __FUNCTION__ "MatView_SeqBAIJ" 346 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer) 347 { 348 Mat A = (Mat) obj; 349 ViewerType vtype; 350 int ierr; 351 352 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 353 if (vtype == MATLAB_VIEWER) { 354 SETERRQ(1,"MatView_SeqBAIJ:Matlab viewer not supported"); 355 } 356 else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){ 357 return MatView_SeqBAIJ_ASCII(A,viewer); 358 } 359 else if (vtype == BINARY_FILE_VIEWER) { 360 return MatView_SeqBAIJ_Binary(A,viewer); 361 } 362 else if (vtype == DRAW_VIEWER) { 363 return MatView_SeqBAIJ_Draw(A,viewer); 364 } 365 return 0; 366 } 367 368 #define CHUNKSIZE 10 369 370 #undef __FUNCTION__ 371 #define __FUNCTION__ "MatSetValues_SeqBAIJ" 372 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 373 { 374 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 375 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 376 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 377 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 378 int ridx,cidx,bs2=a->bs2; 379 Scalar *ap,value,*aa=a->a,*bap; 380 381 for ( k=0; k<m; k++ ) { /* loop over added rows */ 382 row = im[k]; brow = row/bs; 383 #if defined(PETSC_BOPT_g) 384 if (row < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative row"); 385 if (row >= a->m) SETERRQ(1,"MatSetValues_SeqBAIJ:Row too large"); 386 #endif 387 rp = aj + ai[brow]; 388 ap = aa + bs2*ai[brow]; 389 rmax = imax[brow]; 390 nrow = ailen[brow]; 391 low = 0; 392 for ( l=0; l<n; l++ ) { /* loop over added columns */ 393 #if defined(PETSC_BOPT_g) 394 if (in[l] < 0) SETERRQ(1,"MatSetValues_SeqBAIJ:Negative column"); 395 if (in[l] >= a->n) SETERRQ(1,"MatSetValues_SeqBAIJ:Column too large"); 396 #endif 397 col = in[l]; bcol = col/bs; 398 ridx = row % bs; cidx = col % bs; 399 if (roworiented) { 400 value = *v++; 401 } 402 else { 403 value = v[k + l*m]; 404 } 405 if (!sorted) low = 0; high = nrow; 406 while (high-low > 7) { 407 t = (low+high)/2; 408 if (rp[t] > bcol) high = t; 409 else low = t; 410 } 411 for ( i=low; i<high; i++ ) { 412 if (rp[i] > bcol) break; 413 if (rp[i] == bcol) { 414 bap = ap + bs2*i + bs*cidx + ridx; 415 if (is == ADD_VALUES) *bap += value; 416 else *bap = value; 417 goto noinsert; 418 } 419 } 420 if (nonew) goto noinsert; 421 if (nrow >= rmax) { 422 /* there is no extra room in row, therefore enlarge */ 423 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 424 Scalar *new_a; 425 426 /* malloc new storage space */ 427 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 428 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 429 new_j = (int *) (new_a + bs2*new_nz); 430 new_i = new_j + new_nz; 431 432 /* copy over old data into new slots */ 433 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} 434 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 435 PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); 436 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 437 PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, 438 len*sizeof(int)); 439 PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); 440 PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 441 PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), 442 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); 443 /* free up old matrix storage */ 444 PetscFree(a->a); 445 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 446 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 447 a->singlemalloc = 1; 448 449 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 450 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 451 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 452 a->maxnz += bs2*CHUNKSIZE; 453 a->reallocs++; 454 a->nz++; 455 } 456 N = nrow++ - 1; 457 /* shift up all the later entries in this row */ 458 for ( ii=N; ii>=i; ii-- ) { 459 rp[ii+1] = rp[ii]; 460 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 461 } 462 if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 463 rp[i] = bcol; 464 ap[bs2*i + bs*cidx + ridx] = value; 465 noinsert:; 466 low = i; 467 } 468 ailen[brow] = nrow; 469 } 470 return 0; 471 } 472 473 #undef __FUNCTION__ 474 #define __FUNCTION__ "MatGetSize_SeqBAIJ" 475 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 476 { 477 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 478 *m = a->m; *n = a->n; 479 return 0; 480 } 481 482 #undef __FUNCTION__ 483 #define __FUNCTION__ "MatGetOwnershipRange_SeqBAIJ" 484 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 485 { 486 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 487 *m = 0; *n = a->m; 488 return 0; 489 } 490 491 #undef __FUNCTION__ 492 #define __FUNCTION__ "MatGetRow_SeqBAIJ" 493 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 494 { 495 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 496 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 497 Scalar *aa,*v_i,*aa_i; 498 499 bs = a->bs; 500 ai = a->i; 501 aj = a->j; 502 aa = a->a; 503 bs2 = a->bs2; 504 505 if (row < 0 || row >= a->m) SETERRQ(1,"MatGetRow_SeqBAIJ:Row out of range"); 506 507 bn = row/bs; /* Block number */ 508 bp = row % bs; /* Block Position */ 509 M = ai[bn+1] - ai[bn]; 510 *nz = bs*M; 511 512 if (v) { 513 *v = 0; 514 if (*nz) { 515 *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v); 516 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 517 v_i = *v + i*bs; 518 aa_i = aa + bs2*(ai[bn] + i); 519 for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];} 520 } 521 } 522 } 523 524 if (idx) { 525 *idx = 0; 526 if (*nz) { 527 *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx); 528 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 529 idx_i = *idx + i*bs; 530 itmp = bs*aj[ai[bn] + i]; 531 for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;} 532 } 533 } 534 } 535 return 0; 536 } 537 538 #undef __FUNCTION__ 539 #define __FUNCTION__ "MatRestoreRow_SeqBAIJ" 540 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 541 { 542 if (idx) {if (*idx) PetscFree(*idx);} 543 if (v) {if (*v) PetscFree(*v);} 544 return 0; 545 } 546 547 #undef __FUNCTION__ 548 #define __FUNCTION__ "MatTranspose_SeqBAIJ" 549 static int MatTranspose_SeqBAIJ(Mat A,Mat *B) 550 { 551 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 552 Mat C; 553 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 554 int *rows,*cols,bs2=a->bs2; 555 Scalar *array=a->a; 556 557 if (B==PETSC_NULL && mbs!=nbs) 558 SETERRQ(1,"MatTranspose_SeqBAIJ:Square matrix only for in-place"); 559 col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col); 560 PetscMemzero(col,(1+nbs)*sizeof(int)); 561 562 for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1; 563 ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr); 564 PetscFree(col); 565 rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows); 566 cols = rows + bs; 567 for ( i=0; i<mbs; i++ ) { 568 cols[0] = i*bs; 569 for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1; 570 len = ai[i+1] - ai[i]; 571 for ( j=0; j<len; j++ ) { 572 rows[0] = (*aj++)*bs; 573 for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1; 574 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr); 575 array += bs2; 576 } 577 } 578 PetscFree(rows); 579 580 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 581 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 582 583 if (B != PETSC_NULL) { 584 *B = C; 585 } else { 586 /* This isn't really an in-place transpose */ 587 PetscFree(a->a); 588 if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);} 589 if (a->diag) PetscFree(a->diag); 590 if (a->ilen) PetscFree(a->ilen); 591 if (a->imax) PetscFree(a->imax); 592 if (a->solve_work) PetscFree(a->solve_work); 593 PetscFree(a); 594 PetscMemcpy(A,C,sizeof(struct _Mat)); 595 PetscHeaderDestroy(C); 596 } 597 return 0; 598 } 599 600 601 #undef __FUNCTION__ 602 #define __FUNCTION__ "MatAssemblyEnd_SeqBAIJ" 603 static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 604 { 605 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 606 int fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax; 607 int m = a->m,*ip, N, *ailen = a->ilen; 608 int mbs = a->mbs, bs2 = a->bs2; 609 Scalar *aa = a->a, *ap; 610 611 if (mode == MAT_FLUSH_ASSEMBLY) return 0; 612 613 for ( i=1; i<mbs; i++ ) { 614 /* move each row back by the amount of empty slots (fshift) before it*/ 615 fshift += imax[i-1] - ailen[i-1]; 616 if (fshift) { 617 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 618 N = ailen[i]; 619 for ( j=0; j<N; j++ ) { 620 ip[j-fshift] = ip[j]; 621 PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar)); 622 } 623 } 624 ai[i] = ai[i-1] + ailen[i-1]; 625 } 626 if (mbs) { 627 fshift += imax[mbs-1] - ailen[mbs-1]; 628 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 629 } 630 /* reset ilen and imax for each row */ 631 for ( i=0; i<mbs; i++ ) { 632 ailen[i] = imax[i] = ai[i+1] - ai[i]; 633 } 634 a->nz = ai[mbs]; 635 636 /* diagonals may have moved, so kill the diagonal pointers */ 637 if (fshift && a->diag) { 638 PetscFree(a->diag); 639 PLogObjectMemory(A,-(m+1)*sizeof(int)); 640 a->diag = 0; 641 } 642 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 643 m,a->n,a->bs,fshift*bs2,a->nz*bs2); 644 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n", 645 a->reallocs); 646 a->reallocs = 0; 647 A->info.nz_unneeded = (double)fshift*bs2; 648 649 return 0; 650 } 651 652 #undef __FUNCTION__ 653 #define __FUNCTION__ "MatZeroEntries_SeqBAIJ" 654 static int MatZeroEntries_SeqBAIJ(Mat A) 655 { 656 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 657 PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar)); 658 return 0; 659 } 660 661 #undef __FUNCTION__ 662 #define __FUNCTION__ "MatDestroy_SeqBAIJ" 663 int MatDestroy_SeqBAIJ(PetscObject obj) 664 { 665 Mat A = (Mat) obj; 666 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 667 int ierr; 668 669 #if defined(PETSC_LOG) 670 PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 671 #endif 672 PetscFree(a->a); 673 if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);} 674 if (a->diag) PetscFree(a->diag); 675 if (a->ilen) PetscFree(a->ilen); 676 if (a->imax) PetscFree(a->imax); 677 if (a->solve_work) PetscFree(a->solve_work); 678 if (a->mult_work) PetscFree(a->mult_work); 679 PetscFree(a); 680 if (A->mapping) { 681 ierr = ISLocalToGlobalMappingDestroy(A->mapping); CHKERRQ(ierr); 682 } 683 PLogObjectDestroy(A); 684 PetscHeaderDestroy(A); 685 return 0; 686 } 687 688 #undef __FUNCTION__ 689 #define __FUNCTION__ "MatSetOption_SeqBAIJ" 690 static int MatSetOption_SeqBAIJ(Mat A,MatOption op) 691 { 692 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 693 if (op == MAT_ROW_ORIENTED) a->roworiented = 1; 694 else if (op == MAT_COLUMN_ORIENTED) a->roworiented = 0; 695 else if (op == MAT_COLUMNS_SORTED) a->sorted = 1; 696 else if (op == MAT_COLUMNS_UNSORTED) a->sorted = 0; 697 else if (op == MAT_NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 698 else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 699 else if (op == MAT_ROWS_SORTED || 700 op == MAT_ROWS_UNSORTED || 701 op == MAT_SYMMETRIC || 702 op == MAT_STRUCTURALLY_SYMMETRIC || 703 op == MAT_YES_NEW_DIAGONALS || 704 op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES) 705 PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n"); 706 else if (op == MAT_NO_NEW_DIAGONALS) 707 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:MAT_NO_NEW_DIAGONALS");} 708 else 709 {SETERRQ(PETSC_ERR_SUP,"MatSetOption_SeqBAIJ:unknown option");} 710 return 0; 711 } 712 713 714 /* -------------------------------------------------------*/ 715 /* Should check that shapes of vectors and matrices match */ 716 /* -------------------------------------------------------*/ 717 #include "pinclude/plapack.h" 718 719 #undef __FUNCTION__ 720 #define __FUNCTION__ "MatMult_SeqBAIJ_1" 721 static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz) 722 { 723 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 724 register Scalar *x,*z,*v,sum; 725 int mbs=a->mbs,i,*idx,*ii,n; 726 727 VecGetArray_Fast(xx,x); 728 VecGetArray_Fast(zz,z); 729 730 idx = a->j; 731 v = a->a; 732 ii = a->i; 733 734 for ( i=0; i<mbs; i++ ) { 735 n = ii[1] - ii[0]; ii++; 736 sum = 0.0; 737 while (n--) sum += *v++ * x[*idx++]; 738 z[i] = sum; 739 } 740 VecRestoreArray_Fast(xx,x); 741 VecRestoreArray_Fast(zz,z); 742 PLogFlops(2*a->nz - a->m); 743 return 0; 744 } 745 746 #undef __FUNCTION__ 747 #define __FUNCTION__ "MatMult_SeqBAIJ_2" 748 static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz) 749 { 750 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 751 register Scalar *x,*z,*v,*xb,sum1,sum2; 752 register Scalar x1,x2; 753 int mbs=a->mbs,i,*idx,*ii; 754 int j,n; 755 756 VecGetArray_Fast(xx,x); 757 VecGetArray_Fast(zz,z); 758 759 idx = a->j; 760 v = a->a; 761 ii = a->i; 762 763 for ( i=0; i<mbs; i++ ) { 764 n = ii[1] - ii[0]; ii++; 765 sum1 = 0.0; sum2 = 0.0; 766 for ( j=0; j<n; j++ ) { 767 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 768 sum1 += v[0]*x1 + v[2]*x2; 769 sum2 += v[1]*x1 + v[3]*x2; 770 v += 4; 771 } 772 z[0] = sum1; z[1] = sum2; 773 z += 2; 774 } 775 VecRestoreArray_Fast(xx,x); 776 VecRestoreArray_Fast(zz,z); 777 PLogFlops(4*a->nz - a->m); 778 return 0; 779 } 780 781 #undef __FUNCTION__ 782 #define __FUNCTION__ "MatMult_SeqBAIJ_3" 783 static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz) 784 { 785 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 786 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 787 int mbs=a->mbs,i,*idx,*ii,j,n; 788 789 VecGetArray_Fast(xx,x); 790 VecGetArray_Fast(zz,z); 791 792 idx = a->j; 793 v = a->a; 794 ii = a->i; 795 796 for ( i=0; i<mbs; i++ ) { 797 n = ii[1] - ii[0]; ii++; 798 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 799 for ( j=0; j<n; j++ ) { 800 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 801 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 802 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 803 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 804 v += 9; 805 } 806 z[0] = sum1; z[1] = sum2; z[2] = sum3; 807 z += 3; 808 } 809 VecRestoreArray_Fast(xx,x); 810 VecRestoreArray_Fast(zz,z); 811 PLogFlops(18*a->nz - a->m); 812 return 0; 813 } 814 815 #undef __FUNCTION__ 816 #define __FUNCTION__ "MatMult_SeqBAIJ_4" 817 static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz) 818 { 819 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 820 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4; 821 register Scalar x1,x2,x3,x4; 822 int mbs=a->mbs,i,*idx,*ii; 823 int j,n; 824 825 VecGetArray_Fast(xx,x); 826 VecGetArray_Fast(zz,z); 827 828 idx = a->j; 829 v = a->a; 830 ii = a->i; 831 832 for ( i=0; i<mbs; i++ ) { 833 n = ii[1] - ii[0]; ii++; 834 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 835 for ( j=0; j<n; j++ ) { 836 xb = x + 4*(*idx++); 837 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 838 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 839 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 840 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 841 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 842 v += 16; 843 } 844 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 845 z += 4; 846 } 847 VecRestoreArray_Fast(xx,x); 848 VecRestoreArray_Fast(zz,z); 849 PLogFlops(32*a->nz - a->m); 850 return 0; 851 } 852 853 #undef __FUNCTION__ 854 #define __FUNCTION__ "MatMult_SeqBAIJ_5" 855 static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz) 856 { 857 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 858 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5; 859 register Scalar x1,x2,x3,x4,x5; 860 int mbs=a->mbs,i,*idx,*ii,j,n; 861 862 VecGetArray_Fast(xx,x); 863 VecGetArray_Fast(zz,z); 864 865 idx = a->j; 866 v = a->a; 867 ii = a->i; 868 869 for ( i=0; i<mbs; i++ ) { 870 n = ii[1] - ii[0]; ii++; 871 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 872 for ( j=0; j<n; j++ ) { 873 xb = x + 5*(*idx++); 874 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 875 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 876 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 877 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 878 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 879 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 880 v += 25; 881 } 882 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 883 z += 5; 884 } 885 VecRestoreArray_Fast(xx,x); 886 VecRestoreArray_Fast(zz,z); 887 PLogFlops(50*a->nz - a->m); 888 return 0; 889 } 890 891 #undef __FUNCTION__ 892 #define __FUNCTION__ "MatMult_SeqBAIJ_N" 893 static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz) 894 { 895 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 896 register Scalar *x,*z,*v,*xb; 897 int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2; 898 int _One = 1,ncols,k; 899 Scalar _DOne = 1.0,*work,*workt,_DZero = 0.0; 900 901 VecGetArray_Fast(xx,x); 902 VecGetArray_Fast(zz,z); 903 904 idx = a->j; 905 v = a->a; 906 ii = a->i; 907 908 909 if (!a->mult_work) { 910 k = PetscMax(a->m,a->n); 911 a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work); 912 } 913 work = a->mult_work; 914 for ( i=0; i<mbs; i++ ) { 915 n = ii[1] - ii[0]; ii++; 916 ncols = n*bs; 917 workt = work; 918 for ( j=0; j<n; j++ ) { 919 xb = x + bs*(*idx++); 920 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 921 workt += bs; 922 } 923 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One); 924 v += n*bs2; 925 z += bs; 926 } 927 VecRestoreArray_Fast(xx,x); 928 VecRestoreArray_Fast(zz,z); 929 PLogFlops(2*a->nz*bs2 - a->m); 930 return 0; 931 } 932 933 #undef __FUNCTION__ 934 #define __FUNCTION__ "MatMultAdd_SeqBAIJ_1" 935 static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 936 { 937 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 938 register Scalar *x,*y,*z,*v,sum; 939 int mbs=a->mbs,i,*idx,*ii,n; 940 941 VecGetArray_Fast(xx,x); 942 VecGetArray_Fast(yy,y); 943 VecGetArray_Fast(zz,z); 944 945 idx = a->j; 946 v = a->a; 947 ii = a->i; 948 949 for ( i=0; i<mbs; i++ ) { 950 n = ii[1] - ii[0]; ii++; 951 sum = y[i]; 952 while (n--) sum += *v++ * x[*idx++]; 953 z[i] = sum; 954 } 955 VecRestoreArray_Fast(xx,x); 956 VecRestoreArray_Fast(yy,y); 957 VecRestoreArray_Fast(zz,z); 958 PLogFlops(2*a->nz); 959 return 0; 960 } 961 962 #undef __FUNCTION__ 963 #define __FUNCTION__ "MatMultAdd_SeqBAIJ_2" 964 static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 965 { 966 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 967 register Scalar *x,*y,*z,*v,*xb,sum1,sum2; 968 register Scalar x1,x2; 969 int mbs=a->mbs,i,*idx,*ii; 970 int j,n; 971 972 VecGetArray_Fast(xx,x); 973 VecGetArray_Fast(yy,y); 974 VecGetArray_Fast(zz,z); 975 976 idx = a->j; 977 v = a->a; 978 ii = a->i; 979 980 for ( i=0; i<mbs; i++ ) { 981 n = ii[1] - ii[0]; ii++; 982 sum1 = y[0]; sum2 = y[1]; 983 for ( j=0; j<n; j++ ) { 984 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 985 sum1 += v[0]*x1 + v[2]*x2; 986 sum2 += v[1]*x1 + v[3]*x2; 987 v += 4; 988 } 989 z[0] = sum1; z[1] = sum2; 990 z += 2; y += 2; 991 } 992 VecRestoreArray_Fast(xx,x); 993 VecRestoreArray_Fast(yy,y); 994 VecRestoreArray_Fast(zz,z); 995 PLogFlops(4*a->nz); 996 return 0; 997 } 998 999 #undef __FUNCTION__ 1000 #define __FUNCTION__ "MatMultAdd_SeqBAIJ_3" 1001 static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 1002 { 1003 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1004 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 1005 int mbs=a->mbs,i,*idx,*ii,j,n; 1006 1007 VecGetArray_Fast(xx,x); 1008 VecGetArray_Fast(yy,y); 1009 VecGetArray_Fast(zz,z); 1010 1011 idx = a->j; 1012 v = a->a; 1013 ii = a->i; 1014 1015 for ( i=0; i<mbs; i++ ) { 1016 n = ii[1] - ii[0]; ii++; 1017 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; 1018 for ( j=0; j<n; j++ ) { 1019 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1020 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 1021 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 1022 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 1023 v += 9; 1024 } 1025 z[0] = sum1; z[1] = sum2; z[2] = sum3; 1026 z += 3; y += 3; 1027 } 1028 VecRestoreArray_Fast(xx,x); 1029 VecRestoreArray_Fast(yy,y); 1030 VecRestoreArray_Fast(zz,z); 1031 PLogFlops(18*a->nz); 1032 return 0; 1033 } 1034 1035 #undef __FUNCTION__ 1036 #define __FUNCTION__ "MatMultAdd_SeqBAIJ_4" 1037 static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 1038 { 1039 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1040 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4; 1041 register Scalar x1,x2,x3,x4; 1042 int mbs=a->mbs,i,*idx,*ii; 1043 int j,n; 1044 1045 VecGetArray_Fast(xx,x); 1046 VecGetArray_Fast(yy,y); 1047 VecGetArray_Fast(zz,z); 1048 1049 idx = a->j; 1050 v = a->a; 1051 ii = a->i; 1052 1053 for ( i=0; i<mbs; i++ ) { 1054 n = ii[1] - ii[0]; ii++; 1055 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; 1056 for ( j=0; j<n; j++ ) { 1057 xb = x + 4*(*idx++); 1058 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1059 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 1060 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 1061 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 1062 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 1063 v += 16; 1064 } 1065 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 1066 z += 4; y += 4; 1067 } 1068 VecRestoreArray_Fast(xx,x); 1069 VecRestoreArray_Fast(yy,y); 1070 VecRestoreArray_Fast(zz,z); 1071 PLogFlops(32*a->nz); 1072 return 0; 1073 } 1074 1075 #undef __FUNCTION__ 1076 #define __FUNCTION__ "MatMultAdd_SeqBAIJ_5" 1077 static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 1078 { 1079 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1080 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5; 1081 register Scalar x1,x2,x3,x4,x5; 1082 int mbs=a->mbs,i,*idx,*ii,j,n; 1083 1084 VecGetArray_Fast(xx,x); 1085 VecGetArray_Fast(yy,y); 1086 VecGetArray_Fast(zz,z); 1087 1088 idx = a->j; 1089 v = a->a; 1090 ii = a->i; 1091 1092 for ( i=0; i<mbs; i++ ) { 1093 n = ii[1] - ii[0]; ii++; 1094 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; 1095 for ( j=0; j<n; j++ ) { 1096 xb = x + 5*(*idx++); 1097 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 1098 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 1099 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 1100 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 1101 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 1102 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 1103 v += 25; 1104 } 1105 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 1106 z += 5; y += 5; 1107 } 1108 VecRestoreArray_Fast(xx,x); 1109 VecRestoreArray_Fast(yy,y); 1110 VecRestoreArray_Fast(zz,z); 1111 PLogFlops(50*a->nz); 1112 return 0; 1113 } 1114 1115 #undef __FUNCTION__ 1116 #define __FUNCTION__ "MatMultAdd_SeqBAIJ_N" 1117 static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 1118 { 1119 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1120 register Scalar *x,*z,*v,*xb; 1121 int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr; 1122 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1123 1124 if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 1125 1126 VecGetArray_Fast(xx,x); 1127 VecGetArray_Fast(zz,z); 1128 1129 idx = a->j; 1130 v = a->a; 1131 ii = a->i; 1132 1133 1134 if (!a->mult_work) { 1135 k = PetscMax(a->m,a->n); 1136 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work); 1137 } 1138 work = a->mult_work; 1139 for ( i=0; i<mbs; i++ ) { 1140 n = ii[1] - ii[0]; ii++; 1141 ncols = n*bs; 1142 workt = work; 1143 for ( j=0; j<n; j++ ) { 1144 xb = x + bs*(*idx++); 1145 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 1146 workt += bs; 1147 } 1148 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); 1149 v += n*bs2; 1150 z += bs; 1151 } 1152 VecRestoreArray_Fast(xx,x); 1153 VecRestoreArray_Fast(zz,z); 1154 PLogFlops(2*a->nz*bs2 ); 1155 return 0; 1156 } 1157 1158 #undef __FUNCTION__ 1159 #define __FUNCTION__ "MatMultTrans_SeqBAIJ" 1160 static int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz) 1161 { 1162 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1163 Scalar *xg,*zg,*zb; 1164 register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 1165 int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 1166 int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1167 1168 1169 VecGetArray_Fast(xx,xg); x = xg; 1170 VecGetArray_Fast(zz,zg); z = zg; 1171 PetscMemzero(z,N*sizeof(Scalar)); 1172 1173 idx = a->j; 1174 v = a->a; 1175 ii = a->i; 1176 1177 switch (bs) { 1178 case 1: 1179 for ( i=0; i<mbs; i++ ) { 1180 n = ii[1] - ii[0]; ii++; 1181 xb = x + i; x1 = xb[0]; 1182 ib = idx + ai[i]; 1183 for ( j=0; j<n; j++ ) { 1184 rval = ib[j]; 1185 z[rval] += *v++ * x1; 1186 } 1187 } 1188 break; 1189 case 2: 1190 for ( i=0; i<mbs; i++ ) { 1191 n = ii[1] - ii[0]; ii++; 1192 xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1193 ib = idx + ai[i]; 1194 for ( j=0; j<n; j++ ) { 1195 rval = ib[j]*2; 1196 z[rval++] += v[0]*x1 + v[1]*x2; 1197 z[rval++] += v[2]*x1 + v[3]*x2; 1198 v += 4; 1199 } 1200 } 1201 break; 1202 case 3: 1203 for ( i=0; i<mbs; i++ ) { 1204 n = ii[1] - ii[0]; ii++; 1205 xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1206 ib = idx + ai[i]; 1207 for ( j=0; j<n; j++ ) { 1208 rval = ib[j]*3; 1209 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1210 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1211 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1212 v += 9; 1213 } 1214 } 1215 break; 1216 case 4: 1217 for ( i=0; i<mbs; i++ ) { 1218 n = ii[1] - ii[0]; ii++; 1219 xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1220 ib = idx + ai[i]; 1221 for ( j=0; j<n; j++ ) { 1222 rval = ib[j]*4; 1223 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1224 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1225 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1226 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1227 v += 16; 1228 } 1229 } 1230 break; 1231 case 5: 1232 for ( i=0; i<mbs; i++ ) { 1233 n = ii[1] - ii[0]; ii++; 1234 xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1235 x4 = xb[3]; x5 = xb[4]; 1236 ib = idx + ai[i]; 1237 for ( j=0; j<n; j++ ) { 1238 rval = ib[j]*5; 1239 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1240 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1241 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1242 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1243 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1244 v += 25; 1245 } 1246 } 1247 break; 1248 /* block sizes larger then 5 by 5 are handled by BLAS */ 1249 default: { 1250 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1251 if (!a->mult_work) { 1252 k = PetscMax(a->m,a->n); 1253 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1254 CHKPTRQ(a->mult_work); 1255 } 1256 work = a->mult_work; 1257 for ( i=0; i<mbs; i++ ) { 1258 n = ii[1] - ii[0]; ii++; 1259 ncols = n*bs; 1260 PetscMemzero(work,ncols*sizeof(Scalar)); 1261 LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1262 v += n*bs2; 1263 x += bs; 1264 workt = work; 1265 for ( j=0; j<n; j++ ) { 1266 zb = z + bs*(*idx++); 1267 for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1268 workt += bs; 1269 } 1270 } 1271 } 1272 } 1273 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 1274 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1275 PLogFlops(2*a->nz*a->bs2 - a->n); 1276 return 0; 1277 } 1278 1279 #undef __FUNCTION__ 1280 #define __FUNCTION__ "MatMultTransAdd_SeqBAIJ" 1281 static int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1282 { 1283 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1284 Scalar *xg,*zg,*zb; 1285 register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 1286 int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 1287 int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1288 1289 1290 1291 VecGetArray_Fast(xx,xg); x = xg; 1292 VecGetArray_Fast(zz,zg); z = zg; 1293 1294 if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 1295 else PetscMemzero(z,N*sizeof(Scalar)); 1296 1297 1298 idx = a->j; 1299 v = a->a; 1300 ii = a->i; 1301 1302 switch (bs) { 1303 case 1: 1304 for ( i=0; i<mbs; i++ ) { 1305 n = ii[1] - ii[0]; ii++; 1306 xb = x + i; x1 = xb[0]; 1307 ib = idx + ai[i]; 1308 for ( j=0; j<n; j++ ) { 1309 rval = ib[j]; 1310 z[rval] += *v++ * x1; 1311 } 1312 } 1313 break; 1314 case 2: 1315 for ( i=0; i<mbs; i++ ) { 1316 n = ii[1] - ii[0]; ii++; 1317 xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1318 ib = idx + ai[i]; 1319 for ( j=0; j<n; j++ ) { 1320 rval = ib[j]*2; 1321 z[rval++] += v[0]*x1 + v[1]*x2; 1322 z[rval++] += v[2]*x1 + v[3]*x2; 1323 v += 4; 1324 } 1325 } 1326 break; 1327 case 3: 1328 for ( i=0; i<mbs; i++ ) { 1329 n = ii[1] - ii[0]; ii++; 1330 xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1331 ib = idx + ai[i]; 1332 for ( j=0; j<n; j++ ) { 1333 rval = ib[j]*3; 1334 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1335 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1336 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1337 v += 9; 1338 } 1339 } 1340 break; 1341 case 4: 1342 for ( i=0; i<mbs; i++ ) { 1343 n = ii[1] - ii[0]; ii++; 1344 xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1345 ib = idx + ai[i]; 1346 for ( j=0; j<n; j++ ) { 1347 rval = ib[j]*4; 1348 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1349 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1350 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1351 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1352 v += 16; 1353 } 1354 } 1355 break; 1356 case 5: 1357 for ( i=0; i<mbs; i++ ) { 1358 n = ii[1] - ii[0]; ii++; 1359 xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1360 x4 = xb[3]; x5 = xb[4]; 1361 ib = idx + ai[i]; 1362 for ( j=0; j<n; j++ ) { 1363 rval = ib[j]*5; 1364 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1365 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1366 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1367 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1368 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1369 v += 25; 1370 } 1371 } 1372 break; 1373 /* block sizes larger then 5 by 5 are handled by BLAS */ 1374 default: { 1375 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1376 if (!a->mult_work) { 1377 k = PetscMax(a->m,a->n); 1378 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1379 CHKPTRQ(a->mult_work); 1380 } 1381 work = a->mult_work; 1382 for ( i=0; i<mbs; i++ ) { 1383 n = ii[1] - ii[0]; ii++; 1384 ncols = n*bs; 1385 PetscMemzero(work,ncols*sizeof(Scalar)); 1386 LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1387 v += n*bs2; 1388 x += bs; 1389 workt = work; 1390 for ( j=0; j<n; j++ ) { 1391 zb = z + bs*(*idx++); 1392 for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1393 workt += bs; 1394 } 1395 } 1396 } 1397 } 1398 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 1399 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1400 PLogFlops(2*a->nz*a->bs2); 1401 return 0; 1402 } 1403 1404 #undef __FUNCTION__ 1405 #define __FUNCTION__ "MatGetInfo_SeqBAIJ" 1406 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info) 1407 { 1408 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1409 1410 info->rows_global = (double)a->m; 1411 info->columns_global = (double)a->n; 1412 info->rows_local = (double)a->m; 1413 info->columns_local = (double)a->n; 1414 info->block_size = a->bs2; 1415 info->nz_allocated = a->maxnz; 1416 info->nz_used = a->bs2*a->nz; 1417 info->nz_unneeded = (double)(info->nz_allocated - info->nz_used); 1418 /* if (info->nz_unneeded != A->info.nz_unneeded) 1419 printf("space descrepancy: maxnz-nz = %d, nz_unneeded = %d\n",(int)info->nz_unneeded,(int)A->info.nz_unneeded); */ 1420 info->assemblies = A->num_ass; 1421 info->mallocs = a->reallocs; 1422 info->memory = A->mem; 1423 if (A->factor) { 1424 info->fill_ratio_given = A->info.fill_ratio_given; 1425 info->fill_ratio_needed = A->info.fill_ratio_needed; 1426 info->factor_mallocs = A->info.factor_mallocs; 1427 } else { 1428 info->fill_ratio_given = 0; 1429 info->fill_ratio_needed = 0; 1430 info->factor_mallocs = 0; 1431 } 1432 return 0; 1433 } 1434 1435 #undef __FUNCTION__ 1436 #define __FUNCTION__ "MatEqual_SeqBAIJ" 1437 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg) 1438 { 1439 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 1440 1441 if (B->type !=MATSEQBAIJ)SETERRQ(1,"MatEqual_SeqBAIJ:Matrices must be same type"); 1442 1443 /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 1444 if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)|| 1445 (a->nz != b->nz)) { 1446 *flg = PETSC_FALSE; return 0; 1447 } 1448 1449 /* if the a->i are the same */ 1450 if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) { 1451 *flg = PETSC_FALSE; return 0; 1452 } 1453 1454 /* if a->j are the same */ 1455 if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) { 1456 *flg = PETSC_FALSE; return 0; 1457 } 1458 1459 /* if a->a are the same */ 1460 if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) { 1461 *flg = PETSC_FALSE; return 0; 1462 } 1463 *flg = PETSC_TRUE; 1464 return 0; 1465 1466 } 1467 1468 #undef __FUNCTION__ 1469 #define __FUNCTION__ "MatGetDiagonal_SeqBAIJ" 1470 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v) 1471 { 1472 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1473 int i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 1474 Scalar *x, zero = 0.0,*aa,*aa_j; 1475 1476 bs = a->bs; 1477 aa = a->a; 1478 ai = a->i; 1479 aj = a->j; 1480 ambs = a->mbs; 1481 bs2 = a->bs2; 1482 1483 VecSet(&zero,v); 1484 VecGetArray_Fast(v,x); VecGetLocalSize_Fast(v,n); 1485 if (n != a->m) SETERRQ(1,"MatGetDiagonal_SeqBAIJ:Nonconforming matrix and vector"); 1486 for ( i=0; i<ambs; i++ ) { 1487 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1488 if (aj[j] == i) { 1489 row = i*bs; 1490 aa_j = aa+j*bs2; 1491 for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 1492 break; 1493 } 1494 } 1495 } 1496 return 0; 1497 } 1498 1499 #undef __FUNCTION__ 1500 #define __FUNCTION__ "MatDiagonalScale_SeqBAIJ" 1501 static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr) 1502 { 1503 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1504 Scalar *l,*r,x,*v,*aa,*li,*ri; 1505 int i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2; 1506 1507 ai = a->i; 1508 aj = a->j; 1509 aa = a->a; 1510 m = a->m; 1511 n = a->n; 1512 bs = a->bs; 1513 mbs = a->mbs; 1514 bs2 = a->bs2; 1515 if (ll) { 1516 VecGetArray_Fast(ll,l); VecGetLocalSize_Fast(ll,lm); 1517 if (lm != m) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Left scaling vector wrong length"); 1518 for ( i=0; i<mbs; i++ ) { /* for each block row */ 1519 M = ai[i+1] - ai[i]; 1520 li = l + i*bs; 1521 v = aa + bs2*ai[i]; 1522 for ( j=0; j<M; j++ ) { /* for each block */ 1523 for ( k=0; k<bs2; k++ ) { 1524 (*v++) *= li[k%bs]; 1525 } 1526 } 1527 } 1528 } 1529 1530 if (rr) { 1531 VecGetArray_Fast(rr,r); VecGetLocalSize_Fast(rr,rn); 1532 if (rn != n) SETERRQ(1,"MatDiagonalScale_SeqBAIJ:Right scaling vector wrong length"); 1533 for ( i=0; i<mbs; i++ ) { /* for each block row */ 1534 M = ai[i+1] - ai[i]; 1535 v = aa + bs2*ai[i]; 1536 for ( j=0; j<M; j++ ) { /* for each block */ 1537 ri = r + bs*aj[ai[i]+j]; 1538 for ( k=0; k<bs; k++ ) { 1539 x = ri[k]; 1540 for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x; 1541 } 1542 } 1543 } 1544 } 1545 return 0; 1546 } 1547 1548 1549 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 1550 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 1551 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int); 1552 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*); 1553 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**); 1554 1555 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec); 1556 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec); 1557 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec); 1558 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec); 1559 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec); 1560 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec); 1561 1562 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 1563 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 1564 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 1565 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 1566 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 1567 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 1568 1569 #undef __FUNCTION__ 1570 #define __FUNCTION__ "MatNorm_SeqBAIJ" 1571 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm) 1572 { 1573 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1574 Scalar *v = a->a; 1575 double sum = 0.0; 1576 int i,nz=a->nz,bs2=a->bs2; 1577 1578 if (type == NORM_FROBENIUS) { 1579 for (i=0; i< bs2*nz; i++ ) { 1580 #if defined(PETSC_COMPLEX) 1581 sum += real(conj(*v)*(*v)); v++; 1582 #else 1583 sum += (*v)*(*v); v++; 1584 #endif 1585 } 1586 *norm = sqrt(sum); 1587 } 1588 else { 1589 SETERRQ(PETSC_ERR_SUP,"MatNorm_SeqBAIJ:No support for this norm yet"); 1590 } 1591 return 0; 1592 } 1593 1594 /* 1595 note: This can only work for identity for row and col. It would 1596 be good to check this and otherwise generate an error. 1597 */ 1598 #undef __FUNCTION__ 1599 #define __FUNCTION__ "MatILUFactor_SeqBAIJ" 1600 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill) 1601 { 1602 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1603 Mat outA; 1604 int ierr; 1605 1606 if (fill != 0) SETERRQ(1,"MatILUFactor_SeqBAIJ:Only fill=0 supported"); 1607 1608 outA = inA; 1609 inA->factor = FACTOR_LU; 1610 a->row = row; 1611 a->col = col; 1612 1613 a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 1614 1615 if (!a->diag) { 1616 ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr); 1617 } 1618 ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr); 1619 return 0; 1620 } 1621 1622 #undef __FUNCTION__ 1623 #define __FUNCTION__ "MatScale_SeqBAIJ" 1624 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA) 1625 { 1626 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1627 int one = 1, totalnz = a->bs2*a->nz; 1628 BLscal_( &totalnz, alpha, a->a, &one ); 1629 PLogFlops(totalnz); 1630 return 0; 1631 } 1632 1633 #undef __FUNCTION__ 1634 #define __FUNCTION__ "MatGetValues_SeqBAIJ" 1635 static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 1636 { 1637 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1638 int *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 1639 int *ai = a->i, *ailen = a->ilen; 1640 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 1641 Scalar *ap, *aa = a->a, zero = 0.0; 1642 1643 for ( k=0; k<m; k++ ) { /* loop over rows */ 1644 row = im[k]; brow = row/bs; 1645 if (row < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative row"); 1646 if (row >= a->m) SETERRQ(1,"MatGetValues_SeqBAIJ:Row too large"); 1647 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1648 nrow = ailen[brow]; 1649 for ( l=0; l<n; l++ ) { /* loop over columns */ 1650 if (in[l] < 0) SETERRQ(1,"MatGetValues_SeqBAIJ:Negative column"); 1651 if (in[l] >= a->n) SETERRQ(1,"MatGetValues_SeqBAIJ:Column too large"); 1652 col = in[l] ; 1653 bcol = col/bs; 1654 cidx = col%bs; 1655 ridx = row%bs; 1656 high = nrow; 1657 low = 0; /* assume unsorted */ 1658 while (high-low > 5) { 1659 t = (low+high)/2; 1660 if (rp[t] > bcol) high = t; 1661 else low = t; 1662 } 1663 for ( i=low; i<high; i++ ) { 1664 if (rp[i] > bcol) break; 1665 if (rp[i] == bcol) { 1666 *v++ = ap[bs2*i+bs*cidx+ridx]; 1667 goto finished; 1668 } 1669 } 1670 *v++ = zero; 1671 finished:; 1672 } 1673 } 1674 return 0; 1675 } 1676 1677 #undef __FUNCTION__ 1678 #define __FUNCTION__ "MatGetBlockSize_SeqBAIJ" 1679 static int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs) 1680 { 1681 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data; 1682 *bs = baij->bs; 1683 return 0; 1684 } 1685 1686 /* idx should be of length atlease bs */ 1687 #undef __FUNCTION__ 1688 #define __FUNCTION__ "MatZeroRows_SeqBAIJ_Check_Block" 1689 static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg) 1690 { 1691 int i,row; 1692 row = idx[0]; 1693 if (row%bs!=0) { *flg = PETSC_FALSE; return 0; } 1694 1695 for ( i=1; i<bs; i++ ) { 1696 if (row+i != idx[i]) { *flg = PETSC_FALSE; return 0; } 1697 } 1698 *flg = PETSC_TRUE; 1699 return 0; 1700 } 1701 1702 #undef __FUNCTION__ 1703 #define __FUNCTION__ "MatZeroRows_SeqBAIJ" 1704 static int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag) 1705 { 1706 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1707 IS is_local; 1708 int ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2; 1709 PetscTruth flg; 1710 Scalar zero = 0.0,*aa; 1711 1712 /* Make a copy of the IS and sort it */ 1713 ierr = ISGetSize(is,&is_n);CHKERRQ(ierr); 1714 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1715 ierr = ISCreateGeneral(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr); 1716 ierr = ISSort(is_local); CHKERRQ(ierr); 1717 ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr); 1718 1719 i = 0; 1720 while (i < is_n) { 1721 if (rows[i]<0 || rows[i]>m) SETERRQ(1,"MatZeroRows_SeqBAIJ:row out of range"); 1722 flg = PETSC_FALSE; 1723 if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); } 1724 if (flg) { /* There exists a block of rows to be Zerowed */ 1725 baij->ilen[rows[i]/bs] = 0; 1726 i += bs; 1727 } else { /* Zero out only the requested row */ 1728 count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs; 1729 aa = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs); 1730 for ( j=0; j<count; j++ ) { 1731 aa[0] = zero; 1732 aa+=bs; 1733 } 1734 i++; 1735 } 1736 } 1737 if (diag) { 1738 for ( j=0; j<is_n; j++ ) { 1739 ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1740 } 1741 } 1742 ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr); 1743 ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr); 1744 ierr = ISDestroy(is_local); CHKERRQ(ierr); 1745 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1746 1747 return 0; 1748 } 1749 1750 #undef __FUNCTION__ 1751 #define __FUNCTION__ "MatPrintHelp_SeqBAIJ" 1752 int MatPrintHelp_SeqBAIJ(Mat A) 1753 { 1754 static int called = 0; 1755 MPI_Comm comm = A->comm; 1756 1757 if (called) return 0; else called = 1; 1758 PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n"); 1759 PetscPrintf(comm," -mat_block_size <block_size>\n"); 1760 return 0; 1761 } 1762 1763 /* -------------------------------------------------------------------*/ 1764 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ, 1765 MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ, 1766 MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N, 1767 MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ, 1768 MatSolve_SeqBAIJ_N,0, 1769 0,0, 1770 MatLUFactor_SeqBAIJ,0, 1771 0, 1772 MatTranspose_SeqBAIJ, 1773 MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ, 1774 MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ, 1775 0,MatAssemblyEnd_SeqBAIJ, 1776 0, 1777 MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ, 1778 MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0, 1779 MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ, 1780 MatILUFactorSymbolic_SeqBAIJ,0, 1781 0,0,/* MatConvert_SeqBAIJ */ 0, 1782 MatConvertSameType_SeqBAIJ,0,0, 1783 MatILUFactor_SeqBAIJ,0,0, 1784 MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ, 1785 MatGetValues_SeqBAIJ,0, 1786 MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ, 1787 0,0,0,MatGetBlockSize_SeqBAIJ, 1788 MatGetRowIJ_SeqBAIJ, 1789 MatRestoreRowIJ_SeqBAIJ}; 1790 1791 #undef __FUNCTION__ 1792 #define __FUNCTION__ "MatCreateSeqBAIJ" 1793 /*@C 1794 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1795 compressed row) format. For good matrix assembly performance the 1796 user should preallocate the matrix storage by setting the parameter nz 1797 (or the array nzz). By setting these parameters accurately, performance 1798 during matrix assembly can be increased by more than a factor of 50. 1799 1800 Input Parameters: 1801 . comm - MPI communicator, set to MPI_COMM_SELF 1802 . bs - size of block 1803 . m - number of rows 1804 . n - number of columns 1805 . nz - number of block nonzeros per block row (same for all rows) 1806 . nzz - array containing the number of block nonzeros in the various block rows 1807 (possibly different for each block row) or PETSC_NULL 1808 1809 Output Parameter: 1810 . A - the matrix 1811 1812 Notes: 1813 The block AIJ format is fully compatible with standard Fortran 77 1814 storage. That is, the stored row and column indices can begin at 1815 either one (as in Fortran) or zero. See the users' manual for details. 1816 1817 Specify the preallocated storage with either nz or nnz (not both). 1818 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1819 allocation. For additional details, see the users manual chapter on 1820 matrices. 1821 1822 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1823 @*/ 1824 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 1825 { 1826 Mat B; 1827 Mat_SeqBAIJ *b; 1828 int i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs,size; 1829 1830 MPI_Comm_size(comm,&size); 1831 if (size > 1) SETERRQ(1,"MatCreateSeqBAIJ:Comm must be of size 1"); 1832 1833 if (mbs*bs!=m || nbs*bs!=n) 1834 SETERRQ(1,"MatCreateSeqBAIJ:Number rows, cols must be divisible by blocksize"); 1835 1836 *A = 0; 1837 PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm); 1838 PLogObjectCreate(B); 1839 B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b); 1840 PetscMemzero(b,sizeof(Mat_SeqBAIJ)); 1841 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 1842 ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr); 1843 if (!flg) { 1844 switch (bs) { 1845 case 1: 1846 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1847 B->ops.solve = MatSolve_SeqBAIJ_1; 1848 B->ops.mult = MatMult_SeqBAIJ_1; 1849 B->ops.multadd = MatMultAdd_SeqBAIJ_1; 1850 break; 1851 case 2: 1852 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 1853 B->ops.solve = MatSolve_SeqBAIJ_2; 1854 B->ops.mult = MatMult_SeqBAIJ_2; 1855 B->ops.multadd = MatMultAdd_SeqBAIJ_2; 1856 break; 1857 case 3: 1858 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 1859 B->ops.solve = MatSolve_SeqBAIJ_3; 1860 B->ops.mult = MatMult_SeqBAIJ_3; 1861 B->ops.multadd = MatMultAdd_SeqBAIJ_3; 1862 break; 1863 case 4: 1864 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 1865 B->ops.solve = MatSolve_SeqBAIJ_4; 1866 B->ops.mult = MatMult_SeqBAIJ_4; 1867 B->ops.multadd = MatMultAdd_SeqBAIJ_4; 1868 break; 1869 case 5: 1870 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 1871 B->ops.solve = MatSolve_SeqBAIJ_5; 1872 B->ops.mult = MatMult_SeqBAIJ_5; 1873 B->ops.multadd = MatMultAdd_SeqBAIJ_5; 1874 break; 1875 } 1876 } 1877 B->destroy = MatDestroy_SeqBAIJ; 1878 B->view = MatView_SeqBAIJ; 1879 B->factor = 0; 1880 B->lupivotthreshold = 1.0; 1881 B->mapping = 0; 1882 b->row = 0; 1883 b->col = 0; 1884 b->reallocs = 0; 1885 1886 b->m = m; B->m = m; B->M = m; 1887 b->n = n; B->n = n; B->N = n; 1888 b->mbs = mbs; 1889 b->nbs = nbs; 1890 b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax); 1891 if (nnz == PETSC_NULL) { 1892 if (nz == PETSC_DEFAULT) nz = 5; 1893 else if (nz <= 0) nz = 1; 1894 for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 1895 nz = nz*mbs; 1896 } 1897 else { 1898 nz = 0; 1899 for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 1900 } 1901 1902 /* allocate the matrix space */ 1903 len = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int); 1904 b->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a); 1905 PetscMemzero(b->a,nz*bs2*sizeof(Scalar)); 1906 b->j = (int *) (b->a + nz*bs2); 1907 PetscMemzero(b->j,nz*sizeof(int)); 1908 b->i = b->j + nz; 1909 b->singlemalloc = 1; 1910 1911 b->i[0] = 0; 1912 for (i=1; i<mbs+1; i++) { 1913 b->i[i] = b->i[i-1] + b->imax[i-1]; 1914 } 1915 1916 /* b->ilen will count nonzeros in each block row so far. */ 1917 b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 1918 PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 1919 for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 1920 1921 b->bs = bs; 1922 b->bs2 = bs2; 1923 b->mbs = mbs; 1924 b->nz = 0; 1925 b->maxnz = nz*bs2; 1926 b->sorted = 0; 1927 b->roworiented = 1; 1928 b->nonew = 0; 1929 b->diag = 0; 1930 b->solve_work = 0; 1931 b->mult_work = 0; 1932 b->spptr = 0; 1933 B->info.nz_unneeded = (double)b->maxnz; 1934 1935 *A = B; 1936 ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr); 1937 if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); } 1938 return 0; 1939 } 1940 1941 #undef __FUNCTION__ 1942 #define __FUNCTION__ "MatConvertSameType_SeqBAIJ" 1943 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues) 1944 { 1945 Mat C; 1946 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 1947 int i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 1948 1949 if (a->i[mbs] != nz) SETERRQ(1,"MatConvertSameType_SeqBAIJ:Corrupt matrix"); 1950 1951 *B = 0; 1952 PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm); 1953 PLogObjectCreate(C); 1954 C->data = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c); 1955 PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps)); 1956 C->destroy = MatDestroy_SeqBAIJ; 1957 C->view = MatView_SeqBAIJ; 1958 C->factor = A->factor; 1959 c->row = 0; 1960 c->col = 0; 1961 C->assembled = PETSC_TRUE; 1962 1963 c->m = C->m = a->m; 1964 c->n = C->n = a->n; 1965 C->M = a->m; 1966 C->N = a->n; 1967 1968 c->bs = a->bs; 1969 c->bs2 = a->bs2; 1970 c->mbs = a->mbs; 1971 c->nbs = a->nbs; 1972 1973 c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax); 1974 c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen); 1975 for ( i=0; i<mbs; i++ ) { 1976 c->imax[i] = a->imax[i]; 1977 c->ilen[i] = a->ilen[i]; 1978 } 1979 1980 /* allocate the matrix space */ 1981 c->singlemalloc = 1; 1982 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int)); 1983 c->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a); 1984 c->j = (int *) (c->a + nz*bs2); 1985 c->i = c->j + nz; 1986 PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int)); 1987 if (mbs > 0) { 1988 PetscMemcpy(c->j,a->j,nz*sizeof(int)); 1989 if (cpvalues == COPY_VALUES) { 1990 PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar)); 1991 } 1992 } 1993 1994 PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 1995 c->sorted = a->sorted; 1996 c->roworiented = a->roworiented; 1997 c->nonew = a->nonew; 1998 1999 if (a->diag) { 2000 c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag); 2001 PLogObjectMemory(C,(mbs+1)*sizeof(int)); 2002 for ( i=0; i<mbs; i++ ) { 2003 c->diag[i] = a->diag[i]; 2004 } 2005 } 2006 else c->diag = 0; 2007 c->nz = a->nz; 2008 c->maxnz = a->maxnz; 2009 c->solve_work = 0; 2010 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 2011 c->mult_work = 0; 2012 *B = C; 2013 return 0; 2014 } 2015 2016 #undef __FUNCTION__ 2017 #define __FUNCTION__ "MatLoad_SeqBAIJ" 2018 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 2019 { 2020 Mat_SeqBAIJ *a; 2021 Mat B; 2022 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg; 2023 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2024 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 2025 int *masked, nmask,tmp,bs2,ishift; 2026 Scalar *aa; 2027 MPI_Comm comm = ((PetscObject) viewer)->comm; 2028 2029 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 2030 bs2 = bs*bs; 2031 2032 MPI_Comm_size(comm,&size); 2033 if (size > 1) SETERRQ(1,"MatLoad_SeqBAIJ:view must have one processor"); 2034 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 2035 ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr); 2036 if (header[0] != MAT_COOKIE) SETERRQ(1,"MatLoad_SeqBAIJ:not Mat object"); 2037 M = header[1]; N = header[2]; nz = header[3]; 2038 2039 if (M != N) SETERRQ(1,"MatLoad_SeqBAIJ:Can only do square matrices"); 2040 2041 /* 2042 This code adds extra rows to make sure the number of rows is 2043 divisible by the blocksize 2044 */ 2045 mbs = M/bs; 2046 extra_rows = bs - M + bs*(mbs); 2047 if (extra_rows == bs) extra_rows = 0; 2048 else mbs++; 2049 if (extra_rows) { 2050 PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 2051 } 2052 2053 /* read in row lengths */ 2054 rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 2055 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 2056 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 2057 2058 /* read in column indices */ 2059 jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj); 2060 ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr); 2061 for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 2062 2063 /* loop over row lengths determining block row lengths */ 2064 browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 2065 PetscMemzero(browlengths,mbs*sizeof(int)); 2066 mask = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask); 2067 PetscMemzero(mask,mbs*sizeof(int)); 2068 masked = mask + mbs; 2069 rowcount = 0; nzcount = 0; 2070 for ( i=0; i<mbs; i++ ) { 2071 nmask = 0; 2072 for ( j=0; j<bs; j++ ) { 2073 kmax = rowlengths[rowcount]; 2074 for ( k=0; k<kmax; k++ ) { 2075 tmp = jj[nzcount++]/bs; 2076 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2077 } 2078 rowcount++; 2079 } 2080 browlengths[i] += nmask; 2081 /* zero out the mask elements we set */ 2082 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 2083 } 2084 2085 /* create our matrix */ 2086 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A); 2087 CHKERRQ(ierr); 2088 B = *A; 2089 a = (Mat_SeqBAIJ *) B->data; 2090 2091 /* set matrix "i" values */ 2092 a->i[0] = 0; 2093 for ( i=1; i<= mbs; i++ ) { 2094 a->i[i] = a->i[i-1] + browlengths[i-1]; 2095 a->ilen[i-1] = browlengths[i-1]; 2096 } 2097 a->nz = 0; 2098 for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 2099 2100 /* read in nonzero values */ 2101 aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 2102 ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr); 2103 for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 2104 2105 /* set "a" and "j" values into matrix */ 2106 nzcount = 0; jcount = 0; 2107 for ( i=0; i<mbs; i++ ) { 2108 nzcountb = nzcount; 2109 nmask = 0; 2110 for ( j=0; j<bs; j++ ) { 2111 kmax = rowlengths[i*bs+j]; 2112 for ( k=0; k<kmax; k++ ) { 2113 tmp = jj[nzcount++]/bs; 2114 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2115 } 2116 rowcount++; 2117 } 2118 /* sort the masked values */ 2119 PetscSortInt(nmask,masked); 2120 2121 /* set "j" values into matrix */ 2122 maskcount = 1; 2123 for ( j=0; j<nmask; j++ ) { 2124 a->j[jcount++] = masked[j]; 2125 mask[masked[j]] = maskcount++; 2126 } 2127 /* set "a" values into matrix */ 2128 ishift = bs2*a->i[i]; 2129 for ( j=0; j<bs; j++ ) { 2130 kmax = rowlengths[i*bs+j]; 2131 for ( k=0; k<kmax; k++ ) { 2132 tmp = jj[nzcountb]/bs ; 2133 block = mask[tmp] - 1; 2134 point = jj[nzcountb] - bs*tmp; 2135 idx = ishift + bs2*block + j + bs*point; 2136 a->a[idx] = aa[nzcountb++]; 2137 } 2138 } 2139 /* zero out the mask elements we set */ 2140 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 2141 } 2142 if (jcount != a->nz) SETERRQ(1,"MatLoad_SeqBAIJ:Error bad binary matrix"); 2143 2144 PetscFree(rowlengths); 2145 PetscFree(browlengths); 2146 PetscFree(aa); 2147 PetscFree(jj); 2148 PetscFree(mask); 2149 2150 B->assembled = PETSC_TRUE; 2151 2152 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr); 2153 if (flg) { 2154 Viewer tviewer; 2155 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2156 ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO,0);CHKERRQ(ierr); 2157 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2158 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2159 } 2160 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr); 2161 if (flg) { 2162 Viewer tviewer; 2163 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2164 ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO_LONG,0);CHKERRQ(ierr); 2165 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2166 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2167 } 2168 ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr); 2169 if (flg) { 2170 Viewer tviewer; 2171 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2172 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2173 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2174 } 2175 ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr); 2176 if (flg) { 2177 Viewer tviewer; 2178 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2179 ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_MATLAB,"M");CHKERRQ(ierr); 2180 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2181 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2182 } 2183 ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr); 2184 if (flg) { 2185 Viewer tviewer; 2186 ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr); 2187 ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr); 2188 ierr = ViewerFlush(tviewer); CHKERRQ(ierr); 2189 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2190 } 2191 return 0; 2192 } 2193 2194 2195 2196