1 #ifndef lint 2 static char vcid[] = "$Id: baij.c,v 1.82 1997/01/06 20:25:20 balay Exp bsmith $"; 3 #endif 4 5 /* 6 Defines the basic matrix operations for the BAIJ (compressed row) 7 matrix storage format. 8 */ 9 #include "src/mat/impls/baij/seq/baij.h" 10 #include "src/vec/vecimpl.h" 11 #include "src/inline/spops.h" 12 #include "petsc.h" 13 14 15 /* 16 Adds diagonal pointers to sparse matrix structure. 17 */ 18 19 #undef __FUNC__ 20 #define __FUNC__ "MatMarkDiag_SeqBAIJ" 21 int MatMarkDiag_SeqBAIJ(Mat A) 22 { 23 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 24 int i,j, *diag, m = a->mbs; 25 26 diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag); 27 PLogObjectMemory(A,(m+1)*sizeof(int)); 28 for ( i=0; i<m; i++ ) { 29 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 30 if (a->j[j] == i) { 31 diag[i] = j; 32 break; 33 } 34 } 35 } 36 a->diag = diag; 37 return 0; 38 } 39 40 #include "draw.h" 41 #include "pinclude/pviewer.h" 42 #include "sys.h" 43 44 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 45 46 #undef __FUNC__ 47 #define __FUNC__ "MatGetRowIJ_SeqBAIJ" 48 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 49 PetscTruth *done) 50 { 51 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 52 int ierr, n = a->mbs,i; 53 54 *nn = n; 55 if (!ia) return 0; 56 if (symmetric) { 57 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 58 } else if (oshift == 1) { 59 /* temporarily add 1 to i and j indices */ 60 int nz = a->i[n] + 1; 61 for ( i=0; i<nz; i++ ) a->j[i]++; 62 for ( i=0; i<n+1; i++ ) a->i[i]++; 63 *ia = a->i; *ja = a->j; 64 } else { 65 *ia = a->i; *ja = a->j; 66 } 67 68 return 0; 69 } 70 71 #undef __FUNC__ 72 #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ" 73 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 74 PetscTruth *done) 75 { 76 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 77 int i,n = a->mbs; 78 79 if (!ia) return 0; 80 if (symmetric) { 81 PetscFree(*ia); 82 PetscFree(*ja); 83 } else if (oshift == 1) { 84 int nz = a->i[n]; 85 for ( i=0; i<nz; i++ ) a->j[i]--; 86 for ( i=0; i<n+1; i++ ) a->i[i]--; 87 } 88 return 0; 89 } 90 91 92 #undef __FUNC__ 93 #define __FUNC__ "MatView_SeqBAIJ_Binary" 94 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 95 { 96 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 97 int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 98 Scalar *aa; 99 100 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 101 col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 102 col_lens[0] = MAT_COOKIE; 103 104 col_lens[1] = a->m; 105 col_lens[2] = a->n; 106 col_lens[3] = a->nz*bs2; 107 108 /* store lengths of each row and write (including header) to file */ 109 count = 0; 110 for ( i=0; i<a->mbs; i++ ) { 111 for ( j=0; j<bs; j++ ) { 112 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 113 } 114 } 115 ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr); 116 PetscFree(col_lens); 117 118 /* store column indices (zero start index) */ 119 jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj); 120 count = 0; 121 for ( i=0; i<a->mbs; i++ ) { 122 for ( j=0; j<bs; j++ ) { 123 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 124 for ( l=0; l<bs; l++ ) { 125 jj[count++] = bs*a->j[k] + l; 126 } 127 } 128 } 129 } 130 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr); 131 PetscFree(jj); 132 133 /* store nonzero values */ 134 aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa); 135 count = 0; 136 for ( i=0; i<a->mbs; i++ ) { 137 for ( j=0; j<bs; j++ ) { 138 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 139 for ( l=0; l<bs; l++ ) { 140 aa[count++] = a->a[bs2*k + l*bs + j]; 141 } 142 } 143 } 144 } 145 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr); 146 PetscFree(aa); 147 return 0; 148 } 149 150 #undef __FUNC__ 151 #define __FUNC__ "MatView_SeqBAIJ_ASCII" 152 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 153 { 154 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 155 int ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2; 156 FILE *fd; 157 char *outputname; 158 159 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 160 ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr); 161 ierr = ViewerGetFormat(viewer,&format); 162 if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) { 163 fprintf(fd," block size is %d\n",bs); 164 } 165 else if (format == VIEWER_FORMAT_ASCII_MATLAB) { 166 SETERRQ(1,0,"Matlab format not supported"); 167 } 168 else if (format == VIEWER_FORMAT_ASCII_COMMON) { 169 for ( i=0; i<a->mbs; i++ ) { 170 for ( j=0; j<bs; j++ ) { 171 fprintf(fd,"row %d:",i*bs+j); 172 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 173 for ( l=0; l<bs; l++ ) { 174 #if defined(PETSC_COMPLEX) 175 if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0) 176 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 177 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 178 else if (real(a->a[bs2*k + l*bs + j]) != 0.0) 179 fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 180 #else 181 if (a->a[bs2*k + l*bs + j] != 0.0) 182 fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 183 #endif 184 } 185 } 186 fprintf(fd,"\n"); 187 } 188 } 189 } 190 else { 191 for ( i=0; i<a->mbs; i++ ) { 192 for ( j=0; j<bs; j++ ) { 193 fprintf(fd,"row %d:",i*bs+j); 194 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 195 for ( l=0; l<bs; l++ ) { 196 #if defined(PETSC_COMPLEX) 197 if (imag(a->a[bs2*k + l*bs + j]) != 0.0) { 198 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 199 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 200 } 201 else { 202 fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 203 } 204 #else 205 fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 206 #endif 207 } 208 } 209 fprintf(fd,"\n"); 210 } 211 } 212 } 213 fflush(fd); 214 return 0; 215 } 216 217 #undef __FUNC__ 218 #define __FUNC__ "MatView_SeqBAIJ_Draw" 219 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer) 220 { 221 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *) A->data; 222 int row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2; 223 double xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r; 224 Scalar *aa; 225 Draw draw; 226 DrawButton button; 227 PetscTruth isnull; 228 229 ViewerDrawGetDraw(viewer,&draw); 230 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 231 232 xr = a->n; yr = a->m; h = yr/10.0; w = xr/10.0; 233 xr += w; yr += h; xl = -w; yl = -h; 234 ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr); 235 /* loop over matrix elements drawing boxes */ 236 color = DRAW_BLUE; 237 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 238 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 239 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 240 x_l = a->j[j]*bs; x_r = x_l + 1.0; 241 aa = a->a + j*bs2; 242 for ( k=0; k<bs; k++ ) { 243 for ( l=0; l<bs; l++ ) { 244 if (PetscReal(*aa++) >= 0.) continue; 245 DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 246 } 247 } 248 } 249 } 250 color = DRAW_CYAN; 251 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 252 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 253 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 254 x_l = a->j[j]*bs; x_r = x_l + 1.0; 255 aa = a->a + j*bs2; 256 for ( k=0; k<bs; k++ ) { 257 for ( l=0; l<bs; l++ ) { 258 if (PetscReal(*aa++) != 0.) continue; 259 DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 260 } 261 } 262 } 263 } 264 265 color = DRAW_RED; 266 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 267 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 268 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 269 x_l = a->j[j]*bs; x_r = x_l + 1.0; 270 aa = a->a + j*bs2; 271 for ( k=0; k<bs; k++ ) { 272 for ( l=0; l<bs; l++ ) { 273 if (PetscReal(*aa++) <= 0.) continue; 274 DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 275 } 276 } 277 } 278 } 279 280 DrawFlush(draw); 281 DrawGetPause(draw,&pause); 282 if (pause >= 0) { PetscSleep(pause); return 0;} 283 284 /* allow the matrix to zoom or shrink */ 285 ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0); 286 while (button != BUTTON_RIGHT) { 287 DrawClear(draw); 288 if (button == BUTTON_LEFT) scale = .5; 289 else if (button == BUTTON_CENTER) scale = 2.; 290 xl = scale*(xl + w - xc) + xc - w*scale; 291 xr = scale*(xr - w - xc) + xc + w*scale; 292 yl = scale*(yl + h - yc) + yc - h*scale; 293 yr = scale*(yr - h - yc) + yc + h*scale; 294 w *= scale; h *= scale; 295 ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr); 296 color = DRAW_BLUE; 297 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 298 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 299 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 300 x_l = a->j[j]*bs; x_r = x_l + 1.0; 301 aa = a->a + j*bs2; 302 for ( k=0; k<bs; k++ ) { 303 for ( l=0; l<bs; l++ ) { 304 if (PetscReal(*aa++) >= 0.) continue; 305 DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 306 } 307 } 308 } 309 } 310 color = DRAW_CYAN; 311 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 312 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 313 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 314 x_l = a->j[j]*bs; x_r = x_l + 1.0; 315 aa = a->a + j*bs2; 316 for ( k=0; k<bs; k++ ) { 317 for ( l=0; l<bs; l++ ) { 318 if (PetscReal(*aa++) != 0.) continue; 319 DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 320 } 321 } 322 } 323 } 324 325 color = DRAW_RED; 326 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 327 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 328 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 329 x_l = a->j[j]*bs; x_r = x_l + 1.0; 330 aa = a->a + j*bs2; 331 for ( k=0; k<bs; k++ ) { 332 for ( l=0; l<bs; l++ ) { 333 if (PetscReal(*aa++) <= 0.) continue; 334 DrawRectangle(draw,x_l+k,y_l+l,x_r+k,y_r+l,color,color,color,color); 335 } 336 } 337 } 338 } 339 ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0); 340 } 341 return 0; 342 } 343 344 #undef __FUNC__ 345 #define __FUNC__ "MatView_SeqBAIJ" 346 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer) 347 { 348 Mat A = (Mat) obj; 349 ViewerType vtype; 350 int ierr; 351 352 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 353 if (vtype == MATLAB_VIEWER) { 354 SETERRQ(1,0,"Matlab viewer not supported"); 355 } 356 else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){ 357 return MatView_SeqBAIJ_ASCII(A,viewer); 358 } 359 else if (vtype == BINARY_FILE_VIEWER) { 360 return MatView_SeqBAIJ_Binary(A,viewer); 361 } 362 else if (vtype == DRAW_VIEWER) { 363 return MatView_SeqBAIJ_Draw(A,viewer); 364 } 365 return 0; 366 } 367 368 #define CHUNKSIZE 10 369 370 #undef __FUNC__ 371 #define __FUNC__ "MatSetValues_SeqBAIJ" 372 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 373 { 374 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 375 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 376 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 377 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 378 int ridx,cidx,bs2=a->bs2; 379 Scalar *ap,value,*aa=a->a,*bap; 380 381 for ( k=0; k<m; k++ ) { /* loop over added rows */ 382 row = im[k]; brow = row/bs; 383 #if defined(PETSC_BOPT_g) 384 if (row < 0) SETERRQ(1,0,"Negative row"); 385 if (row >= a->m) SETERRQ(1,0,"Row too large"); 386 #endif 387 rp = aj + ai[brow]; 388 ap = aa + bs2*ai[brow]; 389 rmax = imax[brow]; 390 nrow = ailen[brow]; 391 low = 0; 392 for ( l=0; l<n; l++ ) { /* loop over added columns */ 393 #if defined(PETSC_BOPT_g) 394 if (in[l] < 0) SETERRQ(1,0,"Negative column"); 395 if (in[l] >= a->n) SETERRQ(1,0,"Column too large"); 396 #endif 397 col = in[l]; bcol = col/bs; 398 ridx = row % bs; cidx = col % bs; 399 if (roworiented) { 400 value = *v++; 401 } 402 else { 403 value = v[k + l*m]; 404 } 405 if (!sorted) low = 0; high = nrow; 406 while (high-low > 7) { 407 t = (low+high)/2; 408 if (rp[t] > bcol) high = t; 409 else low = t; 410 } 411 for ( i=low; i<high; i++ ) { 412 if (rp[i] > bcol) break; 413 if (rp[i] == bcol) { 414 bap = ap + bs2*i + bs*cidx + ridx; 415 if (is == ADD_VALUES) *bap += value; 416 else *bap = value; 417 goto noinsert; 418 } 419 } 420 if (nonew) goto noinsert; 421 if (nrow >= rmax) { 422 /* there is no extra room in row, therefore enlarge */ 423 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 424 Scalar *new_a; 425 426 /* malloc new storage space */ 427 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 428 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 429 new_j = (int *) (new_a + bs2*new_nz); 430 new_i = new_j + new_nz; 431 432 /* copy over old data into new slots */ 433 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} 434 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 435 PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); 436 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 437 PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, 438 len*sizeof(int)); 439 PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); 440 PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 441 PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), 442 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); 443 /* free up old matrix storage */ 444 PetscFree(a->a); 445 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 446 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 447 a->singlemalloc = 1; 448 449 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 450 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 451 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 452 a->maxnz += bs2*CHUNKSIZE; 453 a->reallocs++; 454 a->nz++; 455 } 456 N = nrow++ - 1; 457 /* shift up all the later entries in this row */ 458 for ( ii=N; ii>=i; ii-- ) { 459 rp[ii+1] = rp[ii]; 460 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 461 } 462 if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 463 rp[i] = bcol; 464 ap[bs2*i + bs*cidx + ridx] = value; 465 noinsert:; 466 low = i; 467 } 468 ailen[brow] = nrow; 469 } 470 return 0; 471 } 472 473 #undef __FUNC__ 474 #define __FUNC__ "MatGetSize_SeqBAIJ" 475 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 476 { 477 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 478 *m = a->m; *n = a->n; 479 return 0; 480 } 481 482 #undef __FUNC__ 483 #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ" 484 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 485 { 486 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 487 *m = 0; *n = a->m; 488 return 0; 489 } 490 491 #undef __FUNC__ 492 #define __FUNC__ "MatGetRow_SeqBAIJ" 493 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 494 { 495 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 496 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 497 Scalar *aa,*v_i,*aa_i; 498 499 bs = a->bs; 500 ai = a->i; 501 aj = a->j; 502 aa = a->a; 503 bs2 = a->bs2; 504 505 if (row < 0 || row >= a->m) SETERRQ(1,0,"Row out of range"); 506 507 bn = row/bs; /* Block number */ 508 bp = row % bs; /* Block Position */ 509 M = ai[bn+1] - ai[bn]; 510 *nz = bs*M; 511 512 if (v) { 513 *v = 0; 514 if (*nz) { 515 *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v); 516 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 517 v_i = *v + i*bs; 518 aa_i = aa + bs2*(ai[bn] + i); 519 for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];} 520 } 521 } 522 } 523 524 if (idx) { 525 *idx = 0; 526 if (*nz) { 527 *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx); 528 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 529 idx_i = *idx + i*bs; 530 itmp = bs*aj[ai[bn] + i]; 531 for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;} 532 } 533 } 534 } 535 return 0; 536 } 537 538 #undef __FUNC__ 539 #define __FUNC__ "MatRestoreRow_SeqBAIJ" 540 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 541 { 542 if (idx) {if (*idx) PetscFree(*idx);} 543 if (v) {if (*v) PetscFree(*v);} 544 return 0; 545 } 546 547 #undef __FUNC__ 548 #define __FUNC__ "MatTranspose_SeqBAIJ" 549 static int MatTranspose_SeqBAIJ(Mat A,Mat *B) 550 { 551 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 552 Mat C; 553 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 554 int *rows,*cols,bs2=a->bs2; 555 Scalar *array=a->a; 556 557 if (B==PETSC_NULL && mbs!=nbs) 558 SETERRQ(1,0,"Square matrix only for in-place"); 559 col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col); 560 PetscMemzero(col,(1+nbs)*sizeof(int)); 561 562 for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1; 563 ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr); 564 PetscFree(col); 565 rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows); 566 cols = rows + bs; 567 for ( i=0; i<mbs; i++ ) { 568 cols[0] = i*bs; 569 for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1; 570 len = ai[i+1] - ai[i]; 571 for ( j=0; j<len; j++ ) { 572 rows[0] = (*aj++)*bs; 573 for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1; 574 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr); 575 array += bs2; 576 } 577 } 578 PetscFree(rows); 579 580 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 581 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 582 583 if (B != PETSC_NULL) { 584 *B = C; 585 } else { 586 /* This isn't really an in-place transpose */ 587 PetscFree(a->a); 588 if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);} 589 if (a->diag) PetscFree(a->diag); 590 if (a->ilen) PetscFree(a->ilen); 591 if (a->imax) PetscFree(a->imax); 592 if (a->solve_work) PetscFree(a->solve_work); 593 PetscFree(a); 594 PetscMemcpy(A,C,sizeof(struct _Mat)); 595 PetscHeaderDestroy(C); 596 } 597 return 0; 598 } 599 600 601 #undef __FUNC__ 602 #define __FUNC__ "MatAssemblyEnd_SeqBAIJ" 603 static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 604 { 605 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 606 int fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax; 607 int m = a->m,*ip, N, *ailen = a->ilen; 608 int mbs = a->mbs, bs2 = a->bs2; 609 Scalar *aa = a->a, *ap; 610 611 if (mode == MAT_FLUSH_ASSEMBLY) return 0; 612 613 for ( i=1; i<mbs; i++ ) { 614 /* move each row back by the amount of empty slots (fshift) before it*/ 615 fshift += imax[i-1] - ailen[i-1]; 616 if (fshift) { 617 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 618 N = ailen[i]; 619 for ( j=0; j<N; j++ ) { 620 ip[j-fshift] = ip[j]; 621 PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar)); 622 } 623 } 624 ai[i] = ai[i-1] + ailen[i-1]; 625 } 626 if (mbs) { 627 fshift += imax[mbs-1] - ailen[mbs-1]; 628 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 629 } 630 /* reset ilen and imax for each row */ 631 for ( i=0; i<mbs; i++ ) { 632 ailen[i] = imax[i] = ai[i+1] - ai[i]; 633 } 634 a->nz = ai[mbs]; 635 636 /* diagonals may have moved, so kill the diagonal pointers */ 637 if (fshift && a->diag) { 638 PetscFree(a->diag); 639 PLogObjectMemory(A,-(m+1)*sizeof(int)); 640 a->diag = 0; 641 } 642 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 643 m,a->n,a->bs,fshift*bs2,a->nz*bs2); 644 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n", 645 a->reallocs); 646 a->reallocs = 0; 647 A->info.nz_unneeded = (double)fshift*bs2; 648 649 return 0; 650 } 651 652 #undef __FUNC__ 653 #define __FUNC__ "MatZeroEntries_SeqBAIJ" 654 static int MatZeroEntries_SeqBAIJ(Mat A) 655 { 656 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 657 PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar)); 658 return 0; 659 } 660 661 #undef __FUNC__ 662 #define __FUNC__ "MatDestroy_SeqBAIJ" 663 int MatDestroy_SeqBAIJ(PetscObject obj) 664 { 665 Mat A = (Mat) obj; 666 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 667 int ierr; 668 669 #if defined(PETSC_LOG) 670 PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 671 #endif 672 PetscFree(a->a); 673 if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);} 674 if (a->diag) PetscFree(a->diag); 675 if (a->ilen) PetscFree(a->ilen); 676 if (a->imax) PetscFree(a->imax); 677 if (a->solve_work) PetscFree(a->solve_work); 678 if (a->mult_work) PetscFree(a->mult_work); 679 PetscFree(a); 680 if (A->mapping) { 681 ierr = ISLocalToGlobalMappingDestroy(A->mapping); CHKERRQ(ierr); 682 } 683 PLogObjectDestroy(A); 684 PetscHeaderDestroy(A); 685 return 0; 686 } 687 688 #undef __FUNC__ 689 #define __FUNC__ "MatSetOption_SeqBAIJ" 690 static int MatSetOption_SeqBAIJ(Mat A,MatOption op) 691 { 692 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 693 if (op == MAT_ROW_ORIENTED) a->roworiented = 1; 694 else if (op == MAT_COLUMN_ORIENTED) a->roworiented = 0; 695 else if (op == MAT_COLUMNS_SORTED) a->sorted = 1; 696 else if (op == MAT_COLUMNS_UNSORTED) a->sorted = 0; 697 else if (op == MAT_NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 698 else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 699 else if (op == MAT_ROWS_SORTED || 700 op == MAT_ROWS_UNSORTED || 701 op == MAT_SYMMETRIC || 702 op == MAT_STRUCTURALLY_SYMMETRIC || 703 op == MAT_YES_NEW_DIAGONALS || 704 op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES) 705 PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n"); 706 else if (op == MAT_NO_NEW_DIAGONALS) 707 {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");} 708 else 709 {SETERRQ(PETSC_ERR_SUP,0,"unknown option");} 710 return 0; 711 } 712 713 714 /* -------------------------------------------------------*/ 715 /* Should check that shapes of vectors and matrices match */ 716 /* -------------------------------------------------------*/ 717 #include "pinclude/plapack.h" 718 719 #undef __FUNC__ 720 #define __FUNC__ "MatMult_SeqBAIJ_1" 721 static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz) 722 { 723 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 724 register Scalar *x,*z,*v,sum; 725 int mbs=a->mbs,i,*idx,*ii,n; 726 727 VecGetArray_Fast(xx,x); 728 VecGetArray_Fast(zz,z); 729 730 idx = a->j; 731 v = a->a; 732 ii = a->i; 733 734 for ( i=0; i<mbs; i++ ) { 735 n = ii[1] - ii[0]; ii++; 736 sum = 0.0; 737 while (n--) sum += *v++ * x[*idx++]; 738 z[i] = sum; 739 } 740 VecRestoreArray_Fast(xx,x); 741 VecRestoreArray_Fast(zz,z); 742 PLogFlops(2*a->nz - a->m); 743 return 0; 744 } 745 746 #undef __FUNC__ 747 #define __FUNC__ "MatMult_SeqBAIJ_2" 748 static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz) 749 { 750 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 751 register Scalar *x,*z,*v,*xb,sum1,sum2; 752 register Scalar x1,x2; 753 int mbs=a->mbs,i,*idx,*ii; 754 int j,n; 755 756 VecGetArray_Fast(xx,x); 757 VecGetArray_Fast(zz,z); 758 759 idx = a->j; 760 v = a->a; 761 ii = a->i; 762 763 for ( i=0; i<mbs; i++ ) { 764 n = ii[1] - ii[0]; ii++; 765 sum1 = 0.0; sum2 = 0.0; 766 for ( j=0; j<n; j++ ) { 767 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 768 sum1 += v[0]*x1 + v[2]*x2; 769 sum2 += v[1]*x1 + v[3]*x2; 770 v += 4; 771 } 772 z[0] = sum1; z[1] = sum2; 773 z += 2; 774 } 775 VecRestoreArray_Fast(xx,x); 776 VecRestoreArray_Fast(zz,z); 777 PLogFlops(4*a->nz - a->m); 778 return 0; 779 } 780 781 #undef __FUNC__ 782 #define __FUNC__ "MatMult_SeqBAIJ_3" 783 static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz) 784 { 785 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 786 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 787 int mbs=a->mbs,i,*idx,*ii,j,n; 788 789 VecGetArray_Fast(xx,x); 790 VecGetArray_Fast(zz,z); 791 792 idx = a->j; 793 v = a->a; 794 ii = a->i; 795 796 for ( i=0; i<mbs; i++ ) { 797 n = ii[1] - ii[0]; ii++; 798 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 799 for ( j=0; j<n; j++ ) { 800 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 801 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 802 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 803 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 804 v += 9; 805 } 806 z[0] = sum1; z[1] = sum2; z[2] = sum3; 807 z += 3; 808 } 809 VecRestoreArray_Fast(xx,x); 810 VecRestoreArray_Fast(zz,z); 811 PLogFlops(18*a->nz - a->m); 812 return 0; 813 } 814 815 #undef __FUNC__ 816 #define __FUNC__ "MatMult_SeqBAIJ_4" 817 static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz) 818 { 819 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 820 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4; 821 register Scalar x1,x2,x3,x4; 822 int mbs=a->mbs,i,*idx,*ii; 823 int j,n; 824 825 VecGetArray_Fast(xx,x); 826 VecGetArray_Fast(zz,z); 827 828 idx = a->j; 829 v = a->a; 830 ii = a->i; 831 832 for ( i=0; i<mbs; i++ ) { 833 n = ii[1] - ii[0]; ii++; 834 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 835 for ( j=0; j<n; j++ ) { 836 xb = x + 4*(*idx++); 837 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 838 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 839 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 840 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 841 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 842 v += 16; 843 } 844 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 845 z += 4; 846 } 847 VecRestoreArray_Fast(xx,x); 848 VecRestoreArray_Fast(zz,z); 849 PLogFlops(32*a->nz - a->m); 850 return 0; 851 } 852 853 #undef __FUNC__ 854 #define __FUNC__ "MatMult_SeqBAIJ_5" 855 static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz) 856 { 857 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 858 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5; 859 register Scalar x1,x2,x3,x4,x5; 860 int mbs=a->mbs,i,*idx,*ii,j,n; 861 862 VecGetArray_Fast(xx,x); 863 VecGetArray_Fast(zz,z); 864 865 idx = a->j; 866 v = a->a; 867 ii = a->i; 868 869 for ( i=0; i<mbs; i++ ) { 870 n = ii[1] - ii[0]; ii++; 871 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 872 for ( j=0; j<n; j++ ) { 873 xb = x + 5*(*idx++); 874 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 875 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 876 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 877 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 878 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 879 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 880 v += 25; 881 } 882 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 883 z += 5; 884 } 885 VecRestoreArray_Fast(xx,x); 886 VecRestoreArray_Fast(zz,z); 887 PLogFlops(50*a->nz - a->m); 888 return 0; 889 } 890 891 #undef __FUNC__ 892 #define __FUNC__ "MatMult_SeqBAIJ_N" 893 static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz) 894 { 895 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 896 register Scalar *x,*z,*v,*xb; 897 int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2; 898 int _One = 1,ncols,k; 899 Scalar _DOne = 1.0,*work,*workt,_DZero = 0.0; 900 901 VecGetArray_Fast(xx,x); 902 VecGetArray_Fast(zz,z); 903 904 idx = a->j; 905 v = a->a; 906 ii = a->i; 907 908 909 if (!a->mult_work) { 910 k = PetscMax(a->m,a->n); 911 a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work); 912 } 913 work = a->mult_work; 914 for ( i=0; i<mbs; i++ ) { 915 n = ii[1] - ii[0]; ii++; 916 ncols = n*bs; 917 workt = work; 918 for ( j=0; j<n; j++ ) { 919 xb = x + bs*(*idx++); 920 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 921 workt += bs; 922 } 923 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One); 924 v += n*bs2; 925 z += bs; 926 } 927 VecRestoreArray_Fast(xx,x); 928 VecRestoreArray_Fast(zz,z); 929 PLogFlops(2*a->nz*bs2 - a->m); 930 return 0; 931 } 932 933 #undef __FUNC__ 934 #define __FUNC__ "MatMultAdd_SeqBAIJ_1" 935 static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 936 { 937 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 938 register Scalar *x,*y,*z,*v,sum; 939 int mbs=a->mbs,i,*idx,*ii,n; 940 941 VecGetArray_Fast(xx,x); 942 VecGetArray_Fast(yy,y); 943 VecGetArray_Fast(zz,z); 944 945 idx = a->j; 946 v = a->a; 947 ii = a->i; 948 949 for ( i=0; i<mbs; i++ ) { 950 n = ii[1] - ii[0]; ii++; 951 sum = y[i]; 952 while (n--) sum += *v++ * x[*idx++]; 953 z[i] = sum; 954 } 955 VecRestoreArray_Fast(xx,x); 956 VecRestoreArray_Fast(yy,y); 957 VecRestoreArray_Fast(zz,z); 958 PLogFlops(2*a->nz); 959 return 0; 960 } 961 962 #undef __FUNC__ 963 #define __FUNC__ "MatMultAdd_SeqBAIJ_2" 964 static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 965 { 966 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 967 register Scalar *x,*y,*z,*v,*xb,sum1,sum2; 968 register Scalar x1,x2; 969 int mbs=a->mbs,i,*idx,*ii; 970 int j,n; 971 972 VecGetArray_Fast(xx,x); 973 VecGetArray_Fast(yy,y); 974 VecGetArray_Fast(zz,z); 975 976 idx = a->j; 977 v = a->a; 978 ii = a->i; 979 980 for ( i=0; i<mbs; i++ ) { 981 n = ii[1] - ii[0]; ii++; 982 sum1 = y[0]; sum2 = y[1]; 983 for ( j=0; j<n; j++ ) { 984 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 985 sum1 += v[0]*x1 + v[2]*x2; 986 sum2 += v[1]*x1 + v[3]*x2; 987 v += 4; 988 } 989 z[0] = sum1; z[1] = sum2; 990 z += 2; y += 2; 991 } 992 VecRestoreArray_Fast(xx,x); 993 VecRestoreArray_Fast(yy,y); 994 VecRestoreArray_Fast(zz,z); 995 PLogFlops(4*a->nz); 996 return 0; 997 } 998 999 #undef __FUNC__ 1000 #define __FUNC__ "MatMultAdd_SeqBAIJ_3" 1001 static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 1002 { 1003 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1004 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 1005 int mbs=a->mbs,i,*idx,*ii,j,n; 1006 1007 VecGetArray_Fast(xx,x); 1008 VecGetArray_Fast(yy,y); 1009 VecGetArray_Fast(zz,z); 1010 1011 idx = a->j; 1012 v = a->a; 1013 ii = a->i; 1014 1015 for ( i=0; i<mbs; i++ ) { 1016 n = ii[1] - ii[0]; ii++; 1017 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; 1018 for ( j=0; j<n; j++ ) { 1019 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1020 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 1021 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 1022 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 1023 v += 9; 1024 } 1025 z[0] = sum1; z[1] = sum2; z[2] = sum3; 1026 z += 3; y += 3; 1027 } 1028 VecRestoreArray_Fast(xx,x); 1029 VecRestoreArray_Fast(yy,y); 1030 VecRestoreArray_Fast(zz,z); 1031 PLogFlops(18*a->nz); 1032 return 0; 1033 } 1034 1035 #undef __FUNC__ 1036 #define __FUNC__ "MatMultAdd_SeqBAIJ_4" 1037 static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 1038 { 1039 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1040 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4; 1041 register Scalar x1,x2,x3,x4; 1042 int mbs=a->mbs,i,*idx,*ii; 1043 int j,n; 1044 1045 VecGetArray_Fast(xx,x); 1046 VecGetArray_Fast(yy,y); 1047 VecGetArray_Fast(zz,z); 1048 1049 idx = a->j; 1050 v = a->a; 1051 ii = a->i; 1052 1053 for ( i=0; i<mbs; i++ ) { 1054 n = ii[1] - ii[0]; ii++; 1055 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; 1056 for ( j=0; j<n; j++ ) { 1057 xb = x + 4*(*idx++); 1058 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1059 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 1060 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 1061 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 1062 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 1063 v += 16; 1064 } 1065 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 1066 z += 4; y += 4; 1067 } 1068 VecRestoreArray_Fast(xx,x); 1069 VecRestoreArray_Fast(yy,y); 1070 VecRestoreArray_Fast(zz,z); 1071 PLogFlops(32*a->nz); 1072 return 0; 1073 } 1074 1075 #undef __FUNC__ 1076 #define __FUNC__ "MatMultAdd_SeqBAIJ_5" 1077 static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 1078 { 1079 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1080 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5; 1081 register Scalar x1,x2,x3,x4,x5; 1082 int mbs=a->mbs,i,*idx,*ii,j,n; 1083 1084 VecGetArray_Fast(xx,x); 1085 VecGetArray_Fast(yy,y); 1086 VecGetArray_Fast(zz,z); 1087 1088 idx = a->j; 1089 v = a->a; 1090 ii = a->i; 1091 1092 for ( i=0; i<mbs; i++ ) { 1093 n = ii[1] - ii[0]; ii++; 1094 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; 1095 for ( j=0; j<n; j++ ) { 1096 xb = x + 5*(*idx++); 1097 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 1098 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 1099 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 1100 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 1101 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 1102 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 1103 v += 25; 1104 } 1105 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 1106 z += 5; y += 5; 1107 } 1108 VecRestoreArray_Fast(xx,x); 1109 VecRestoreArray_Fast(yy,y); 1110 VecRestoreArray_Fast(zz,z); 1111 PLogFlops(50*a->nz); 1112 return 0; 1113 } 1114 1115 #undef __FUNC__ 1116 #define __FUNC__ "MatMultAdd_SeqBAIJ_N" 1117 static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 1118 { 1119 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1120 register Scalar *x,*z,*v,*xb; 1121 int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr; 1122 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1123 1124 if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 1125 1126 VecGetArray_Fast(xx,x); 1127 VecGetArray_Fast(zz,z); 1128 1129 idx = a->j; 1130 v = a->a; 1131 ii = a->i; 1132 1133 1134 if (!a->mult_work) { 1135 k = PetscMax(a->m,a->n); 1136 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work); 1137 } 1138 work = a->mult_work; 1139 for ( i=0; i<mbs; i++ ) { 1140 n = ii[1] - ii[0]; ii++; 1141 ncols = n*bs; 1142 workt = work; 1143 for ( j=0; j<n; j++ ) { 1144 xb = x + bs*(*idx++); 1145 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 1146 workt += bs; 1147 } 1148 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); 1149 v += n*bs2; 1150 z += bs; 1151 } 1152 VecRestoreArray_Fast(xx,x); 1153 VecRestoreArray_Fast(zz,z); 1154 PLogFlops(2*a->nz*bs2 ); 1155 return 0; 1156 } 1157 1158 #undef __FUNC__ 1159 #define __FUNC__ "MatMultTrans_SeqBAIJ" 1160 static int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz) 1161 { 1162 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1163 Scalar *xg,*zg,*zb; 1164 register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 1165 int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 1166 int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1167 1168 1169 VecGetArray_Fast(xx,xg); x = xg; 1170 VecGetArray_Fast(zz,zg); z = zg; 1171 PetscMemzero(z,N*sizeof(Scalar)); 1172 1173 idx = a->j; 1174 v = a->a; 1175 ii = a->i; 1176 1177 switch (bs) { 1178 case 1: 1179 for ( i=0; i<mbs; i++ ) { 1180 n = ii[1] - ii[0]; ii++; 1181 xb = x + i; x1 = xb[0]; 1182 ib = idx + ai[i]; 1183 for ( j=0; j<n; j++ ) { 1184 rval = ib[j]; 1185 z[rval] += *v++ * x1; 1186 } 1187 } 1188 break; 1189 case 2: 1190 for ( i=0; i<mbs; i++ ) { 1191 n = ii[1] - ii[0]; ii++; 1192 xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1193 ib = idx + ai[i]; 1194 for ( j=0; j<n; j++ ) { 1195 rval = ib[j]*2; 1196 z[rval++] += v[0]*x1 + v[1]*x2; 1197 z[rval++] += v[2]*x1 + v[3]*x2; 1198 v += 4; 1199 } 1200 } 1201 break; 1202 case 3: 1203 for ( i=0; i<mbs; i++ ) { 1204 n = ii[1] - ii[0]; ii++; 1205 xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1206 ib = idx + ai[i]; 1207 for ( j=0; j<n; j++ ) { 1208 rval = ib[j]*3; 1209 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1210 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1211 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1212 v += 9; 1213 } 1214 } 1215 break; 1216 case 4: 1217 for ( i=0; i<mbs; i++ ) { 1218 n = ii[1] - ii[0]; ii++; 1219 xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1220 ib = idx + ai[i]; 1221 for ( j=0; j<n; j++ ) { 1222 rval = ib[j]*4; 1223 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1224 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1225 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1226 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1227 v += 16; 1228 } 1229 } 1230 break; 1231 case 5: 1232 for ( i=0; i<mbs; i++ ) { 1233 n = ii[1] - ii[0]; ii++; 1234 xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1235 x4 = xb[3]; x5 = xb[4]; 1236 ib = idx + ai[i]; 1237 for ( j=0; j<n; j++ ) { 1238 rval = ib[j]*5; 1239 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1240 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1241 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1242 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1243 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1244 v += 25; 1245 } 1246 } 1247 break; 1248 /* block sizes larger then 5 by 5 are handled by BLAS */ 1249 default: { 1250 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1251 if (!a->mult_work) { 1252 k = PetscMax(a->m,a->n); 1253 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1254 CHKPTRQ(a->mult_work); 1255 } 1256 work = a->mult_work; 1257 for ( i=0; i<mbs; i++ ) { 1258 n = ii[1] - ii[0]; ii++; 1259 ncols = n*bs; 1260 PetscMemzero(work,ncols*sizeof(Scalar)); 1261 LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1262 v += n*bs2; 1263 x += bs; 1264 workt = work; 1265 for ( j=0; j<n; j++ ) { 1266 zb = z + bs*(*idx++); 1267 for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1268 workt += bs; 1269 } 1270 } 1271 } 1272 } 1273 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 1274 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1275 PLogFlops(2*a->nz*a->bs2 - a->n); 1276 return 0; 1277 } 1278 1279 #undef __FUNC__ 1280 #define __FUNC__ "MatMultTransAdd_SeqBAIJ" 1281 static int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1282 { 1283 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1284 Scalar *xg,*zg,*zb; 1285 register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 1286 int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 1287 int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1288 1289 1290 1291 VecGetArray_Fast(xx,xg); x = xg; 1292 VecGetArray_Fast(zz,zg); z = zg; 1293 1294 if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 1295 else PetscMemzero(z,N*sizeof(Scalar)); 1296 1297 1298 idx = a->j; 1299 v = a->a; 1300 ii = a->i; 1301 1302 switch (bs) { 1303 case 1: 1304 for ( i=0; i<mbs; i++ ) { 1305 n = ii[1] - ii[0]; ii++; 1306 xb = x + i; x1 = xb[0]; 1307 ib = idx + ai[i]; 1308 for ( j=0; j<n; j++ ) { 1309 rval = ib[j]; 1310 z[rval] += *v++ * x1; 1311 } 1312 } 1313 break; 1314 case 2: 1315 for ( i=0; i<mbs; i++ ) { 1316 n = ii[1] - ii[0]; ii++; 1317 xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1318 ib = idx + ai[i]; 1319 for ( j=0; j<n; j++ ) { 1320 rval = ib[j]*2; 1321 z[rval++] += v[0]*x1 + v[1]*x2; 1322 z[rval++] += v[2]*x1 + v[3]*x2; 1323 v += 4; 1324 } 1325 } 1326 break; 1327 case 3: 1328 for ( i=0; i<mbs; i++ ) { 1329 n = ii[1] - ii[0]; ii++; 1330 xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1331 ib = idx + ai[i]; 1332 for ( j=0; j<n; j++ ) { 1333 rval = ib[j]*3; 1334 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1335 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1336 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1337 v += 9; 1338 } 1339 } 1340 break; 1341 case 4: 1342 for ( i=0; i<mbs; i++ ) { 1343 n = ii[1] - ii[0]; ii++; 1344 xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1345 ib = idx + ai[i]; 1346 for ( j=0; j<n; j++ ) { 1347 rval = ib[j]*4; 1348 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1349 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1350 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1351 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1352 v += 16; 1353 } 1354 } 1355 break; 1356 case 5: 1357 for ( i=0; i<mbs; i++ ) { 1358 n = ii[1] - ii[0]; ii++; 1359 xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1360 x4 = xb[3]; x5 = xb[4]; 1361 ib = idx + ai[i]; 1362 for ( j=0; j<n; j++ ) { 1363 rval = ib[j]*5; 1364 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1365 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1366 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1367 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1368 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1369 v += 25; 1370 } 1371 } 1372 break; 1373 /* block sizes larger then 5 by 5 are handled by BLAS */ 1374 default: { 1375 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1376 if (!a->mult_work) { 1377 k = PetscMax(a->m,a->n); 1378 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1379 CHKPTRQ(a->mult_work); 1380 } 1381 work = a->mult_work; 1382 for ( i=0; i<mbs; i++ ) { 1383 n = ii[1] - ii[0]; ii++; 1384 ncols = n*bs; 1385 PetscMemzero(work,ncols*sizeof(Scalar)); 1386 LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1387 v += n*bs2; 1388 x += bs; 1389 workt = work; 1390 for ( j=0; j<n; j++ ) { 1391 zb = z + bs*(*idx++); 1392 for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1393 workt += bs; 1394 } 1395 } 1396 } 1397 } 1398 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 1399 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1400 PLogFlops(2*a->nz*a->bs2); 1401 return 0; 1402 } 1403 1404 #undef __FUNC__ 1405 #define __FUNC__ "MatGetInfo_SeqBAIJ" 1406 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info) 1407 { 1408 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1409 1410 info->rows_global = (double)a->m; 1411 info->columns_global = (double)a->n; 1412 info->rows_local = (double)a->m; 1413 info->columns_local = (double)a->n; 1414 info->block_size = a->bs2; 1415 info->nz_allocated = a->maxnz; 1416 info->nz_used = a->bs2*a->nz; 1417 info->nz_unneeded = (double)(info->nz_allocated - info->nz_used); 1418 /* if (info->nz_unneeded != A->info.nz_unneeded) 1419 printf("space descrepancy: maxnz-nz = %d, nz_unneeded = %d\n",(int)info->nz_unneeded,(int)A->info.nz_unneeded); */ 1420 info->assemblies = A->num_ass; 1421 info->mallocs = a->reallocs; 1422 info->memory = A->mem; 1423 if (A->factor) { 1424 info->fill_ratio_given = A->info.fill_ratio_given; 1425 info->fill_ratio_needed = A->info.fill_ratio_needed; 1426 info->factor_mallocs = A->info.factor_mallocs; 1427 } else { 1428 info->fill_ratio_given = 0; 1429 info->fill_ratio_needed = 0; 1430 info->factor_mallocs = 0; 1431 } 1432 return 0; 1433 } 1434 1435 #undef __FUNC__ 1436 #define __FUNC__ "MatEqual_SeqBAIJ" 1437 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg) 1438 { 1439 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 1440 1441 if (B->type !=MATSEQBAIJ)SETERRQ(1,0,"Matrices must be same type"); 1442 1443 /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 1444 if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)|| 1445 (a->nz != b->nz)) { 1446 *flg = PETSC_FALSE; return 0; 1447 } 1448 1449 /* if the a->i are the same */ 1450 if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) { 1451 *flg = PETSC_FALSE; return 0; 1452 } 1453 1454 /* if a->j are the same */ 1455 if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) { 1456 *flg = PETSC_FALSE; return 0; 1457 } 1458 1459 /* if a->a are the same */ 1460 if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) { 1461 *flg = PETSC_FALSE; return 0; 1462 } 1463 *flg = PETSC_TRUE; 1464 return 0; 1465 1466 } 1467 1468 #undef __FUNC__ 1469 #define __FUNC__ "MatGetDiagonal_SeqBAIJ" 1470 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v) 1471 { 1472 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1473 int i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 1474 Scalar *x, zero = 0.0,*aa,*aa_j; 1475 1476 if (A->factor) SETERRQ(1,0,"Not for factored matrix"); 1477 bs = a->bs; 1478 aa = a->a; 1479 ai = a->i; 1480 aj = a->j; 1481 ambs = a->mbs; 1482 bs2 = a->bs2; 1483 1484 VecSet(&zero,v); 1485 VecGetArray_Fast(v,x); VecGetLocalSize_Fast(v,n); 1486 if (n != a->m) SETERRQ(1,0,"Nonconforming matrix and vector"); 1487 for ( i=0; i<ambs; i++ ) { 1488 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1489 if (aj[j] == i) { 1490 row = i*bs; 1491 aa_j = aa+j*bs2; 1492 for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 1493 break; 1494 } 1495 } 1496 } 1497 return 0; 1498 } 1499 1500 #undef __FUNC__ 1501 #define __FUNC__ "MatDiagonalScale_SeqBAIJ" 1502 static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr) 1503 { 1504 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1505 Scalar *l,*r,x,*v,*aa,*li,*ri; 1506 int i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2; 1507 1508 ai = a->i; 1509 aj = a->j; 1510 aa = a->a; 1511 m = a->m; 1512 n = a->n; 1513 bs = a->bs; 1514 mbs = a->mbs; 1515 bs2 = a->bs2; 1516 if (ll) { 1517 VecGetArray_Fast(ll,l); VecGetLocalSize_Fast(ll,lm); 1518 if (lm != m) SETERRQ(1,0,"Left scaling vector wrong length"); 1519 for ( i=0; i<mbs; i++ ) { /* for each block row */ 1520 M = ai[i+1] - ai[i]; 1521 li = l + i*bs; 1522 v = aa + bs2*ai[i]; 1523 for ( j=0; j<M; j++ ) { /* for each block */ 1524 for ( k=0; k<bs2; k++ ) { 1525 (*v++) *= li[k%bs]; 1526 } 1527 } 1528 } 1529 } 1530 1531 if (rr) { 1532 VecGetArray_Fast(rr,r); VecGetLocalSize_Fast(rr,rn); 1533 if (rn != n) SETERRQ(1,0,"Right scaling vector wrong length"); 1534 for ( i=0; i<mbs; i++ ) { /* for each block row */ 1535 M = ai[i+1] - ai[i]; 1536 v = aa + bs2*ai[i]; 1537 for ( j=0; j<M; j++ ) { /* for each block */ 1538 ri = r + bs*aj[ai[i]+j]; 1539 for ( k=0; k<bs; k++ ) { 1540 x = ri[k]; 1541 for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x; 1542 } 1543 } 1544 } 1545 } 1546 return 0; 1547 } 1548 1549 1550 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 1551 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 1552 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int); 1553 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*); 1554 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**); 1555 1556 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec); 1557 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec); 1558 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec); 1559 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec); 1560 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec); 1561 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec); 1562 1563 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 1564 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 1565 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 1566 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 1567 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 1568 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 1569 1570 #undef __FUNC__ 1571 #define __FUNC__ "MatNorm_SeqBAIJ" 1572 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm) 1573 { 1574 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1575 Scalar *v = a->a; 1576 double sum = 0.0; 1577 int i,nz=a->nz,bs2=a->bs2; 1578 1579 if (type == NORM_FROBENIUS) { 1580 for (i=0; i< bs2*nz; i++ ) { 1581 #if defined(PETSC_COMPLEX) 1582 sum += real(conj(*v)*(*v)); v++; 1583 #else 1584 sum += (*v)*(*v); v++; 1585 #endif 1586 } 1587 *norm = sqrt(sum); 1588 } 1589 else { 1590 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 1591 } 1592 return 0; 1593 } 1594 1595 /* 1596 note: This can only work for identity for row and col. It would 1597 be good to check this and otherwise generate an error. 1598 */ 1599 #undef __FUNC__ 1600 #define __FUNC__ "MatILUFactor_SeqBAIJ" 1601 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill) 1602 { 1603 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1604 Mat outA; 1605 int ierr; 1606 1607 if (fill != 0) SETERRQ(1,0,"Only fill=0 supported"); 1608 1609 outA = inA; 1610 inA->factor = FACTOR_LU; 1611 a->row = row; 1612 a->col = col; 1613 1614 a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 1615 1616 if (!a->diag) { 1617 ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr); 1618 } 1619 ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr); 1620 return 0; 1621 } 1622 1623 #undef __FUNC__ 1624 #define __FUNC__ "MatScale_SeqBAIJ" 1625 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA) 1626 { 1627 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1628 int one = 1, totalnz = a->bs2*a->nz; 1629 BLscal_( &totalnz, alpha, a->a, &one ); 1630 PLogFlops(totalnz); 1631 return 0; 1632 } 1633 1634 #undef __FUNC__ 1635 #define __FUNC__ "MatGetValues_SeqBAIJ" 1636 static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 1637 { 1638 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1639 int *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 1640 int *ai = a->i, *ailen = a->ilen; 1641 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 1642 Scalar *ap, *aa = a->a, zero = 0.0; 1643 1644 for ( k=0; k<m; k++ ) { /* loop over rows */ 1645 row = im[k]; brow = row/bs; 1646 if (row < 0) SETERRQ(1,0,"Negative row"); 1647 if (row >= a->m) SETERRQ(1,0,"Row too large"); 1648 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1649 nrow = ailen[brow]; 1650 for ( l=0; l<n; l++ ) { /* loop over columns */ 1651 if (in[l] < 0) SETERRQ(1,0,"Negative column"); 1652 if (in[l] >= a->n) SETERRQ(1,0,"Column too large"); 1653 col = in[l] ; 1654 bcol = col/bs; 1655 cidx = col%bs; 1656 ridx = row%bs; 1657 high = nrow; 1658 low = 0; /* assume unsorted */ 1659 while (high-low > 5) { 1660 t = (low+high)/2; 1661 if (rp[t] > bcol) high = t; 1662 else low = t; 1663 } 1664 for ( i=low; i<high; i++ ) { 1665 if (rp[i] > bcol) break; 1666 if (rp[i] == bcol) { 1667 *v++ = ap[bs2*i+bs*cidx+ridx]; 1668 goto finished; 1669 } 1670 } 1671 *v++ = zero; 1672 finished:; 1673 } 1674 } 1675 return 0; 1676 } 1677 1678 #undef __FUNC__ 1679 #define __FUNC__ "MatGetBlockSize_SeqBAIJ" 1680 static int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs) 1681 { 1682 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data; 1683 *bs = baij->bs; 1684 return 0; 1685 } 1686 1687 /* idx should be of length atlease bs */ 1688 #undef __FUNC__ 1689 #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Block" 1690 static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg) 1691 { 1692 int i,row; 1693 row = idx[0]; 1694 if (row%bs!=0) { *flg = PETSC_FALSE; return 0; } 1695 1696 for ( i=1; i<bs; i++ ) { 1697 if (row+i != idx[i]) { *flg = PETSC_FALSE; return 0; } 1698 } 1699 *flg = PETSC_TRUE; 1700 return 0; 1701 } 1702 1703 #undef __FUNC__ 1704 #define __FUNC__ "MatZeroRows_SeqBAIJ" 1705 static int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag) 1706 { 1707 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1708 IS is_local; 1709 int ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2; 1710 PetscTruth flg; 1711 Scalar zero = 0.0,*aa; 1712 1713 /* Make a copy of the IS and sort it */ 1714 ierr = ISGetSize(is,&is_n);CHKERRQ(ierr); 1715 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1716 ierr = ISCreateGeneral(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr); 1717 ierr = ISSort(is_local); CHKERRQ(ierr); 1718 ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr); 1719 1720 i = 0; 1721 while (i < is_n) { 1722 if (rows[i]<0 || rows[i]>m) SETERRQ(1,0,"row out of range"); 1723 flg = PETSC_FALSE; 1724 if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); } 1725 if (flg) { /* There exists a block of rows to be Zerowed */ 1726 baij->ilen[rows[i]/bs] = 0; 1727 i += bs; 1728 } else { /* Zero out only the requested row */ 1729 count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs; 1730 aa = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs); 1731 for ( j=0; j<count; j++ ) { 1732 aa[0] = zero; 1733 aa+=bs; 1734 } 1735 i++; 1736 } 1737 } 1738 if (diag) { 1739 for ( j=0; j<is_n; j++ ) { 1740 ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1741 } 1742 } 1743 ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr); 1744 ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr); 1745 ierr = ISDestroy(is_local); CHKERRQ(ierr); 1746 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1747 1748 return 0; 1749 } 1750 1751 #undef __FUNC__ 1752 #define __FUNC__ "MatPrintHelp_SeqBAIJ" 1753 int MatPrintHelp_SeqBAIJ(Mat A) 1754 { 1755 static int called = 0; 1756 MPI_Comm comm = A->comm; 1757 1758 if (called) return 0; else called = 1; 1759 PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n"); 1760 PetscPrintf(comm," -mat_block_size <block_size>\n"); 1761 return 0; 1762 } 1763 1764 /* -------------------------------------------------------------------*/ 1765 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ, 1766 MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ, 1767 MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N, 1768 MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ, 1769 MatSolve_SeqBAIJ_N,0, 1770 0,0, 1771 MatLUFactor_SeqBAIJ,0, 1772 0, 1773 MatTranspose_SeqBAIJ, 1774 MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ, 1775 MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ, 1776 0,MatAssemblyEnd_SeqBAIJ, 1777 0, 1778 MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ, 1779 MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0, 1780 MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ, 1781 MatILUFactorSymbolic_SeqBAIJ,0, 1782 0,0,/* MatConvert_SeqBAIJ */ 0, 1783 MatConvertSameType_SeqBAIJ,0,0, 1784 MatILUFactor_SeqBAIJ,0,0, 1785 MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ, 1786 MatGetValues_SeqBAIJ,0, 1787 MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ, 1788 0,0,0,MatGetBlockSize_SeqBAIJ, 1789 MatGetRowIJ_SeqBAIJ, 1790 MatRestoreRowIJ_SeqBAIJ}; 1791 1792 #undef __FUNC__ 1793 #define __FUNC__ "MatCreateSeqBAIJ" 1794 /*@C 1795 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1796 compressed row) format. For good matrix assembly performance the 1797 user should preallocate the matrix storage by setting the parameter nz 1798 (or the array nzz). By setting these parameters accurately, performance 1799 during matrix assembly can be increased by more than a factor of 50. 1800 1801 Input Parameters: 1802 . comm - MPI communicator, set to MPI_COMM_SELF 1803 . bs - size of block 1804 . m - number of rows 1805 . n - number of columns 1806 . nz - number of block nonzeros per block row (same for all rows) 1807 . nzz - array containing the number of block nonzeros in the various block rows 1808 (possibly different for each block row) or PETSC_NULL 1809 1810 Output Parameter: 1811 . A - the matrix 1812 1813 Options Database Keys: 1814 $ -mat_no_unroll - uses code that does not unroll the loops in the 1815 $ block calculations (much solver) 1816 $ -mat_block_size - size of the blocks to use 1817 1818 Notes: 1819 The block AIJ format is fully compatible with standard Fortran 77 1820 storage. That is, the stored row and column indices can begin at 1821 either one (as in Fortran) or zero. See the users' manual for details. 1822 1823 Specify the preallocated storage with either nz or nnz (not both). 1824 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1825 allocation. For additional details, see the users manual chapter on 1826 matrices. 1827 1828 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1829 @*/ 1830 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 1831 { 1832 Mat B; 1833 Mat_SeqBAIJ *b; 1834 int i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs,size; 1835 1836 MPI_Comm_size(comm,&size); 1837 if (size > 1) SETERRQ(1,0,"Comm must be of size 1"); 1838 1839 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr); 1840 1841 if (mbs*bs!=m || nbs*bs!=n) 1842 SETERRQ(1,0,"Number rows, cols must be divisible by blocksize"); 1843 1844 *A = 0; 1845 PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm); 1846 PLogObjectCreate(B); 1847 B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b); 1848 PetscMemzero(b,sizeof(Mat_SeqBAIJ)); 1849 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 1850 ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr); 1851 if (!flg) { 1852 switch (bs) { 1853 case 1: 1854 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1855 B->ops.solve = MatSolve_SeqBAIJ_1; 1856 B->ops.mult = MatMult_SeqBAIJ_1; 1857 B->ops.multadd = MatMultAdd_SeqBAIJ_1; 1858 break; 1859 case 2: 1860 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 1861 B->ops.solve = MatSolve_SeqBAIJ_2; 1862 B->ops.mult = MatMult_SeqBAIJ_2; 1863 B->ops.multadd = MatMultAdd_SeqBAIJ_2; 1864 break; 1865 case 3: 1866 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 1867 B->ops.solve = MatSolve_SeqBAIJ_3; 1868 B->ops.mult = MatMult_SeqBAIJ_3; 1869 B->ops.multadd = MatMultAdd_SeqBAIJ_3; 1870 break; 1871 case 4: 1872 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 1873 B->ops.solve = MatSolve_SeqBAIJ_4; 1874 B->ops.mult = MatMult_SeqBAIJ_4; 1875 B->ops.multadd = MatMultAdd_SeqBAIJ_4; 1876 break; 1877 case 5: 1878 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 1879 B->ops.solve = MatSolve_SeqBAIJ_5; 1880 B->ops.mult = MatMult_SeqBAIJ_5; 1881 B->ops.multadd = MatMultAdd_SeqBAIJ_5; 1882 break; 1883 } 1884 } 1885 B->destroy = MatDestroy_SeqBAIJ; 1886 B->view = MatView_SeqBAIJ; 1887 B->factor = 0; 1888 B->lupivotthreshold = 1.0; 1889 B->mapping = 0; 1890 b->row = 0; 1891 b->col = 0; 1892 b->reallocs = 0; 1893 1894 b->m = m; B->m = m; B->M = m; 1895 b->n = n; B->n = n; B->N = n; 1896 b->mbs = mbs; 1897 b->nbs = nbs; 1898 b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax); 1899 if (nnz == PETSC_NULL) { 1900 if (nz == PETSC_DEFAULT) nz = 5; 1901 else if (nz <= 0) nz = 1; 1902 for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 1903 nz = nz*mbs; 1904 } 1905 else { 1906 nz = 0; 1907 for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 1908 } 1909 1910 /* allocate the matrix space */ 1911 len = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int); 1912 b->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a); 1913 PetscMemzero(b->a,nz*bs2*sizeof(Scalar)); 1914 b->j = (int *) (b->a + nz*bs2); 1915 PetscMemzero(b->j,nz*sizeof(int)); 1916 b->i = b->j + nz; 1917 b->singlemalloc = 1; 1918 1919 b->i[0] = 0; 1920 for (i=1; i<mbs+1; i++) { 1921 b->i[i] = b->i[i-1] + b->imax[i-1]; 1922 } 1923 1924 /* b->ilen will count nonzeros in each block row so far. */ 1925 b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 1926 PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 1927 for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 1928 1929 b->bs = bs; 1930 b->bs2 = bs2; 1931 b->mbs = mbs; 1932 b->nz = 0; 1933 b->maxnz = nz*bs2; 1934 b->sorted = 0; 1935 b->roworiented = 1; 1936 b->nonew = 0; 1937 b->diag = 0; 1938 b->solve_work = 0; 1939 b->mult_work = 0; 1940 b->spptr = 0; 1941 B->info.nz_unneeded = (double)b->maxnz; 1942 1943 *A = B; 1944 ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr); 1945 if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); } 1946 return 0; 1947 } 1948 1949 #undef __FUNC__ 1950 #define __FUNC__ "MatConvertSameType_SeqBAIJ" 1951 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues) 1952 { 1953 Mat C; 1954 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 1955 int i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 1956 1957 if (a->i[mbs] != nz) SETERRQ(1,0,"Corrupt matrix"); 1958 1959 *B = 0; 1960 PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm); 1961 PLogObjectCreate(C); 1962 C->data = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c); 1963 PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps)); 1964 C->destroy = MatDestroy_SeqBAIJ; 1965 C->view = MatView_SeqBAIJ; 1966 C->factor = A->factor; 1967 c->row = 0; 1968 c->col = 0; 1969 C->assembled = PETSC_TRUE; 1970 1971 c->m = C->m = a->m; 1972 c->n = C->n = a->n; 1973 C->M = a->m; 1974 C->N = a->n; 1975 1976 c->bs = a->bs; 1977 c->bs2 = a->bs2; 1978 c->mbs = a->mbs; 1979 c->nbs = a->nbs; 1980 1981 c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax); 1982 c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen); 1983 for ( i=0; i<mbs; i++ ) { 1984 c->imax[i] = a->imax[i]; 1985 c->ilen[i] = a->ilen[i]; 1986 } 1987 1988 /* allocate the matrix space */ 1989 c->singlemalloc = 1; 1990 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int)); 1991 c->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a); 1992 c->j = (int *) (c->a + nz*bs2); 1993 c->i = c->j + nz; 1994 PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int)); 1995 if (mbs > 0) { 1996 PetscMemcpy(c->j,a->j,nz*sizeof(int)); 1997 if (cpvalues == COPY_VALUES) { 1998 PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar)); 1999 } 2000 } 2001 2002 PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 2003 c->sorted = a->sorted; 2004 c->roworiented = a->roworiented; 2005 c->nonew = a->nonew; 2006 2007 if (a->diag) { 2008 c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag); 2009 PLogObjectMemory(C,(mbs+1)*sizeof(int)); 2010 for ( i=0; i<mbs; i++ ) { 2011 c->diag[i] = a->diag[i]; 2012 } 2013 } 2014 else c->diag = 0; 2015 c->nz = a->nz; 2016 c->maxnz = a->maxnz; 2017 c->solve_work = 0; 2018 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 2019 c->mult_work = 0; 2020 *B = C; 2021 return 0; 2022 } 2023 2024 #undef __FUNC__ 2025 #define __FUNC__ "MatLoad_SeqBAIJ" 2026 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 2027 { 2028 Mat_SeqBAIJ *a; 2029 Mat B; 2030 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg; 2031 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2032 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 2033 int *masked, nmask,tmp,bs2,ishift; 2034 Scalar *aa; 2035 MPI_Comm comm = ((PetscObject) viewer)->comm; 2036 2037 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 2038 bs2 = bs*bs; 2039 2040 MPI_Comm_size(comm,&size); 2041 if (size > 1) SETERRQ(1,0,"view must have one processor"); 2042 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 2043 ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr); 2044 if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not Mat object"); 2045 M = header[1]; N = header[2]; nz = header[3]; 2046 2047 if (M != N) SETERRQ(1,0,"Can only do square matrices"); 2048 2049 /* 2050 This code adds extra rows to make sure the number of rows is 2051 divisible by the blocksize 2052 */ 2053 mbs = M/bs; 2054 extra_rows = bs - M + bs*(mbs); 2055 if (extra_rows == bs) extra_rows = 0; 2056 else mbs++; 2057 if (extra_rows) { 2058 PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 2059 } 2060 2061 /* read in row lengths */ 2062 rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 2063 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 2064 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 2065 2066 /* read in column indices */ 2067 jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj); 2068 ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr); 2069 for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 2070 2071 /* loop over row lengths determining block row lengths */ 2072 browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 2073 PetscMemzero(browlengths,mbs*sizeof(int)); 2074 mask = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask); 2075 PetscMemzero(mask,mbs*sizeof(int)); 2076 masked = mask + mbs; 2077 rowcount = 0; nzcount = 0; 2078 for ( i=0; i<mbs; i++ ) { 2079 nmask = 0; 2080 for ( j=0; j<bs; j++ ) { 2081 kmax = rowlengths[rowcount]; 2082 for ( k=0; k<kmax; k++ ) { 2083 tmp = jj[nzcount++]/bs; 2084 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2085 } 2086 rowcount++; 2087 } 2088 browlengths[i] += nmask; 2089 /* zero out the mask elements we set */ 2090 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 2091 } 2092 2093 /* create our matrix */ 2094 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A); 2095 CHKERRQ(ierr); 2096 B = *A; 2097 a = (Mat_SeqBAIJ *) B->data; 2098 2099 /* set matrix "i" values */ 2100 a->i[0] = 0; 2101 for ( i=1; i<= mbs; i++ ) { 2102 a->i[i] = a->i[i-1] + browlengths[i-1]; 2103 a->ilen[i-1] = browlengths[i-1]; 2104 } 2105 a->nz = 0; 2106 for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 2107 2108 /* read in nonzero values */ 2109 aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 2110 ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr); 2111 for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 2112 2113 /* set "a" and "j" values into matrix */ 2114 nzcount = 0; jcount = 0; 2115 for ( i=0; i<mbs; i++ ) { 2116 nzcountb = nzcount; 2117 nmask = 0; 2118 for ( j=0; j<bs; j++ ) { 2119 kmax = rowlengths[i*bs+j]; 2120 for ( k=0; k<kmax; k++ ) { 2121 tmp = jj[nzcount++]/bs; 2122 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2123 } 2124 rowcount++; 2125 } 2126 /* sort the masked values */ 2127 PetscSortInt(nmask,masked); 2128 2129 /* set "j" values into matrix */ 2130 maskcount = 1; 2131 for ( j=0; j<nmask; j++ ) { 2132 a->j[jcount++] = masked[j]; 2133 mask[masked[j]] = maskcount++; 2134 } 2135 /* set "a" values into matrix */ 2136 ishift = bs2*a->i[i]; 2137 for ( j=0; j<bs; j++ ) { 2138 kmax = rowlengths[i*bs+j]; 2139 for ( k=0; k<kmax; k++ ) { 2140 tmp = jj[nzcountb]/bs ; 2141 block = mask[tmp] - 1; 2142 point = jj[nzcountb] - bs*tmp; 2143 idx = ishift + bs2*block + j + bs*point; 2144 a->a[idx] = aa[nzcountb++]; 2145 } 2146 } 2147 /* zero out the mask elements we set */ 2148 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 2149 } 2150 if (jcount != a->nz) SETERRQ(1,0,"Error bad binary matrix"); 2151 2152 PetscFree(rowlengths); 2153 PetscFree(browlengths); 2154 PetscFree(aa); 2155 PetscFree(jj); 2156 PetscFree(mask); 2157 2158 B->assembled = PETSC_TRUE; 2159 2160 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr); 2161 if (flg) { 2162 Viewer tviewer; 2163 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2164 ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO,0);CHKERRQ(ierr); 2165 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2166 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2167 } 2168 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr); 2169 if (flg) { 2170 Viewer tviewer; 2171 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2172 ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO_LONG,0);CHKERRQ(ierr); 2173 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2174 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2175 } 2176 ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr); 2177 if (flg) { 2178 Viewer tviewer; 2179 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2180 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2181 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2182 } 2183 ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr); 2184 if (flg) { 2185 Viewer tviewer; 2186 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2187 ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_MATLAB,"M");CHKERRQ(ierr); 2188 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2189 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2190 } 2191 ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr); 2192 if (flg) { 2193 Viewer tviewer; 2194 ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr); 2195 ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr); 2196 ierr = ViewerFlush(tviewer); CHKERRQ(ierr); 2197 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2198 } 2199 return 0; 2200 } 2201 2202 2203 2204