1 #ifndef lint 2 static char vcid[] = "$Id: baij.c,v 1.86 1997/01/29 19:45:09 balay Exp bsmith $"; 3 #endif 4 5 /* 6 Defines the basic matrix operations for the BAIJ (compressed row) 7 matrix storage format. 8 */ 9 #include "src/mat/impls/baij/seq/baij.h" 10 #include "src/vec/vecimpl.h" 11 #include "src/inline/spops.h" 12 #include "petsc.h" 13 14 15 /* 16 Adds diagonal pointers to sparse matrix structure. 17 */ 18 19 #undef __FUNC__ 20 #define __FUNC__ "MatMarkDiag_SeqBAIJ" 21 int MatMarkDiag_SeqBAIJ(Mat A) 22 { 23 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 24 int i,j, *diag, m = a->mbs; 25 26 diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag); 27 PLogObjectMemory(A,(m+1)*sizeof(int)); 28 for ( i=0; i<m; i++ ) { 29 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 30 if (a->j[j] == i) { 31 diag[i] = j; 32 break; 33 } 34 } 35 } 36 a->diag = diag; 37 return 0; 38 } 39 40 #include "draw.h" 41 #include "pinclude/pviewer.h" 42 #include "sys.h" 43 44 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 45 46 #undef __FUNC__ 47 #define __FUNC__ "MatGetRowIJ_SeqBAIJ" 48 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 49 PetscTruth *done) 50 { 51 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 52 int ierr, n = a->mbs,i; 53 54 *nn = n; 55 if (!ia) return 0; 56 if (symmetric) { 57 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 58 } else if (oshift == 1) { 59 /* temporarily add 1 to i and j indices */ 60 int nz = a->i[n] + 1; 61 for ( i=0; i<nz; i++ ) a->j[i]++; 62 for ( i=0; i<n+1; i++ ) a->i[i]++; 63 *ia = a->i; *ja = a->j; 64 } else { 65 *ia = a->i; *ja = a->j; 66 } 67 68 return 0; 69 } 70 71 #undef __FUNC__ 72 #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ" 73 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 74 PetscTruth *done) 75 { 76 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 77 int i,n = a->mbs; 78 79 if (!ia) return 0; 80 if (symmetric) { 81 PetscFree(*ia); 82 PetscFree(*ja); 83 } else if (oshift == 1) { 84 int nz = a->i[n]; 85 for ( i=0; i<nz; i++ ) a->j[i]--; 86 for ( i=0; i<n+1; i++ ) a->i[i]--; 87 } 88 return 0; 89 } 90 91 92 #undef __FUNC__ 93 #define __FUNC__ "MatView_SeqBAIJ_Binary" 94 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 95 { 96 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 97 int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 98 Scalar *aa; 99 100 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 101 col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 102 col_lens[0] = MAT_COOKIE; 103 104 col_lens[1] = a->m; 105 col_lens[2] = a->n; 106 col_lens[3] = a->nz*bs2; 107 108 /* store lengths of each row and write (including header) to file */ 109 count = 0; 110 for ( i=0; i<a->mbs; i++ ) { 111 for ( j=0; j<bs; j++ ) { 112 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 113 } 114 } 115 ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr); 116 PetscFree(col_lens); 117 118 /* store column indices (zero start index) */ 119 jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj); 120 count = 0; 121 for ( i=0; i<a->mbs; i++ ) { 122 for ( j=0; j<bs; j++ ) { 123 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 124 for ( l=0; l<bs; l++ ) { 125 jj[count++] = bs*a->j[k] + l; 126 } 127 } 128 } 129 } 130 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr); 131 PetscFree(jj); 132 133 /* store nonzero values */ 134 aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa); 135 count = 0; 136 for ( i=0; i<a->mbs; i++ ) { 137 for ( j=0; j<bs; j++ ) { 138 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 139 for ( l=0; l<bs; l++ ) { 140 aa[count++] = a->a[bs2*k + l*bs + j]; 141 } 142 } 143 } 144 } 145 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr); 146 PetscFree(aa); 147 return 0; 148 } 149 150 #undef __FUNC__ 151 #define __FUNC__ "MatView_SeqBAIJ_ASCII" 152 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 153 { 154 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 155 int ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2; 156 FILE *fd; 157 char *outputname; 158 159 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 160 ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr); 161 ierr = ViewerGetFormat(viewer,&format); 162 if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) { 163 fprintf(fd," block size is %d\n",bs); 164 } 165 else if (format == VIEWER_FORMAT_ASCII_MATLAB) { 166 SETERRQ(1,0,"Matlab format not supported"); 167 } 168 else if (format == VIEWER_FORMAT_ASCII_COMMON) { 169 for ( i=0; i<a->mbs; i++ ) { 170 for ( j=0; j<bs; j++ ) { 171 fprintf(fd,"row %d:",i*bs+j); 172 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 173 for ( l=0; l<bs; l++ ) { 174 #if defined(PETSC_COMPLEX) 175 if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0) 176 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 177 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 178 else if (real(a->a[bs2*k + l*bs + j]) != 0.0) 179 fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 180 #else 181 if (a->a[bs2*k + l*bs + j] != 0.0) 182 fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 183 #endif 184 } 185 } 186 fprintf(fd,"\n"); 187 } 188 } 189 } 190 else { 191 for ( i=0; i<a->mbs; i++ ) { 192 for ( j=0; j<bs; j++ ) { 193 fprintf(fd,"row %d:",i*bs+j); 194 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 195 for ( l=0; l<bs; l++ ) { 196 #if defined(PETSC_COMPLEX) 197 if (imag(a->a[bs2*k + l*bs + j]) != 0.0) { 198 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 199 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 200 } 201 else { 202 fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 203 } 204 #else 205 fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 206 #endif 207 } 208 } 209 fprintf(fd,"\n"); 210 } 211 } 212 } 213 fflush(fd); 214 return 0; 215 } 216 217 #undef __FUNC__ 218 #define __FUNC__ "MatView_SeqBAIJ_Draw" 219 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer) 220 { 221 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *) A->data; 222 int row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2; 223 double xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r; 224 Scalar *aa; 225 Draw draw; 226 DrawButton button; 227 PetscTruth isnull; 228 229 ViewerDrawGetDraw(viewer,&draw); 230 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 231 232 xr = a->n; yr = a->m; h = yr/10.0; w = xr/10.0; 233 xr += w; yr += h; xl = -w; yl = -h; 234 ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr); 235 /* loop over matrix elements drawing boxes */ 236 color = DRAW_BLUE; 237 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 238 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 239 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 240 x_l = a->j[j]*bs; x_r = x_l + 1.0; 241 aa = a->a + j*bs2; 242 for ( k=0; k<bs; k++ ) { 243 for ( l=0; l<bs; l++ ) { 244 if (PetscReal(*aa++) >= 0.) continue; 245 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 246 } 247 } 248 } 249 } 250 color = DRAW_CYAN; 251 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 252 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 253 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 254 x_l = a->j[j]*bs; x_r = x_l + 1.0; 255 aa = a->a + j*bs2; 256 for ( k=0; k<bs; k++ ) { 257 for ( l=0; l<bs; l++ ) { 258 if (PetscReal(*aa++) != 0.) continue; 259 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 260 } 261 } 262 } 263 } 264 265 color = DRAW_RED; 266 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 267 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 268 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 269 x_l = a->j[j]*bs; x_r = x_l + 1.0; 270 aa = a->a + j*bs2; 271 for ( k=0; k<bs; k++ ) { 272 for ( l=0; l<bs; l++ ) { 273 if (PetscReal(*aa++) <= 0.) continue; 274 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 275 } 276 } 277 } 278 } 279 280 DrawFlush(draw); 281 DrawGetPause(draw,&pause); 282 if (pause >= 0) { PetscSleep(pause); return 0;} 283 284 /* allow the matrix to zoom or shrink */ 285 ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0); 286 while (button != BUTTON_RIGHT) { 287 DrawClear(draw); 288 if (button == BUTTON_LEFT) scale = .5; 289 else if (button == BUTTON_CENTER) scale = 2.; 290 xl = scale*(xl + w - xc) + xc - w*scale; 291 xr = scale*(xr - w - xc) + xc + w*scale; 292 yl = scale*(yl + h - yc) + yc - h*scale; 293 yr = scale*(yr - h - yc) + yc + h*scale; 294 w *= scale; h *= scale; 295 ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr); 296 color = DRAW_BLUE; 297 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 298 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 299 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 300 x_l = a->j[j]*bs; x_r = x_l + 1.0; 301 aa = a->a + j*bs2; 302 for ( k=0; k<bs; k++ ) { 303 for ( l=0; l<bs; l++ ) { 304 if (PetscReal(*aa++) >= 0.) continue; 305 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 306 } 307 } 308 } 309 } 310 color = DRAW_CYAN; 311 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 312 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 313 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 314 x_l = a->j[j]*bs; x_r = x_l + 1.0; 315 aa = a->a + j*bs2; 316 for ( k=0; k<bs; k++ ) { 317 for ( l=0; l<bs; l++ ) { 318 if (PetscReal(*aa++) != 0.) continue; 319 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 320 } 321 } 322 } 323 } 324 325 color = DRAW_RED; 326 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 327 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 328 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 329 x_l = a->j[j]*bs; x_r = x_l + 1.0; 330 aa = a->a + j*bs2; 331 for ( k=0; k<bs; k++ ) { 332 for ( l=0; l<bs; l++ ) { 333 if (PetscReal(*aa++) <= 0.) continue; 334 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 335 } 336 } 337 } 338 } 339 ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0); 340 } 341 return 0; 342 } 343 344 #undef __FUNC__ 345 #define __FUNC__ "MatView_SeqBAIJ" 346 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer) 347 { 348 Mat A = (Mat) obj; 349 ViewerType vtype; 350 int ierr; 351 352 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 353 if (vtype == MATLAB_VIEWER) { 354 SETERRQ(1,0,"Matlab viewer not supported"); 355 } 356 else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){ 357 return MatView_SeqBAIJ_ASCII(A,viewer); 358 } 359 else if (vtype == BINARY_FILE_VIEWER) { 360 return MatView_SeqBAIJ_Binary(A,viewer); 361 } 362 else if (vtype == DRAW_VIEWER) { 363 return MatView_SeqBAIJ_Draw(A,viewer); 364 } 365 return 0; 366 } 367 368 #define CHUNKSIZE 10 369 370 #undef __FUNC__ 371 #define __FUNC__ "MatSetValues_SeqBAIJ" 372 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 373 { 374 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 375 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 376 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 377 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 378 int ridx,cidx,bs2=a->bs2; 379 Scalar *ap,value,*aa=a->a,*bap; 380 381 for ( k=0; k<m; k++ ) { /* loop over added rows */ 382 row = im[k]; brow = row/bs; 383 #if defined(PETSC_BOPT_g) 384 if (row < 0) SETERRQ(1,0,"Negative row"); 385 if (row >= a->m) SETERRQ(1,0,"Row too large"); 386 #endif 387 rp = aj + ai[brow]; 388 ap = aa + bs2*ai[brow]; 389 rmax = imax[brow]; 390 nrow = ailen[brow]; 391 low = 0; 392 for ( l=0; l<n; l++ ) { /* loop over added columns */ 393 #if defined(PETSC_BOPT_g) 394 if (in[l] < 0) SETERRQ(1,0,"Negative column"); 395 if (in[l] >= a->n) SETERRQ(1,0,"Column too large"); 396 #endif 397 col = in[l]; bcol = col/bs; 398 ridx = row % bs; cidx = col % bs; 399 if (roworiented) { 400 value = *v++; 401 } 402 else { 403 value = v[k + l*m]; 404 } 405 if (!sorted) low = 0; high = nrow; 406 while (high-low > 7) { 407 t = (low+high)/2; 408 if (rp[t] > bcol) high = t; 409 else low = t; 410 } 411 for ( i=low; i<high; i++ ) { 412 if (rp[i] > bcol) break; 413 if (rp[i] == bcol) { 414 bap = ap + bs2*i + bs*cidx + ridx; 415 if (is == ADD_VALUES) *bap += value; 416 else *bap = value; 417 goto noinsert; 418 } 419 } 420 if (nonew) goto noinsert; 421 if (nrow >= rmax) { 422 /* there is no extra room in row, therefore enlarge */ 423 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 424 Scalar *new_a; 425 426 /* malloc new storage space */ 427 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 428 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 429 new_j = (int *) (new_a + bs2*new_nz); 430 new_i = new_j + new_nz; 431 432 /* copy over old data into new slots */ 433 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} 434 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 435 PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); 436 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 437 PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, 438 len*sizeof(int)); 439 PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); 440 PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 441 PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), 442 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); 443 /* free up old matrix storage */ 444 PetscFree(a->a); 445 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 446 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 447 a->singlemalloc = 1; 448 449 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 450 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 451 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 452 a->maxnz += bs2*CHUNKSIZE; 453 a->reallocs++; 454 a->nz++; 455 } 456 N = nrow++ - 1; 457 /* shift up all the later entries in this row */ 458 for ( ii=N; ii>=i; ii-- ) { 459 rp[ii+1] = rp[ii]; 460 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 461 } 462 if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 463 rp[i] = bcol; 464 ap[bs2*i + bs*cidx + ridx] = value; 465 noinsert:; 466 low = i; 467 } 468 ailen[brow] = nrow; 469 } 470 return 0; 471 } 472 473 #undef __FUNC__ 474 #define __FUNC__ "MatSetValues_SeqBAIJ" 475 int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 476 { 477 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 478 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 479 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 480 int *aj=a->j,nonew=a->nonew,bs=a->bs; 481 int ridx,cidx,bs2=a->bs2; 482 Scalar *ap,value,*aa=a->a,*bap; 483 484 for ( k=0; k<m; k++ ) { /* loop over added rows */ 485 row = im[k]; 486 #if defined(PETSC_BOPT_g) 487 if (row < 0) SETERRQ(1,0,"Negative row"); 488 if (row >= a->m) SETERRQ(1,0,"Row too large"); 489 #endif 490 rp = aj + ai[row]; 491 ap = aa + bs2*ai[row]; 492 rmax = imax[row]; 493 nrow = ailen[row]; 494 low = 0; 495 for ( l=0; l<n; l++ ) { /* loop over added columns */ 496 #if defined(PETSC_BOPT_g) 497 if (in[l] < 0) SETERRQ(1,0,"Negative column"); 498 if (in[l] >= a->n) SETERRQ(1,0,"Column too large"); 499 #endif 500 col = in[l]; 501 ridx = row % bs; cidx = col % bs; 502 if (roworiented) { 503 value = *v++; 504 } 505 else { 506 value = v[k + l*m]; 507 } 508 if (!sorted) low = 0; high = nrow; 509 while (high-low > 7) { 510 t = (low+high)/2; 511 if (rp[t] > col) high = t; 512 else low = t; 513 } 514 for ( i=low; i<high; i++ ) { 515 if (rp[i] > col) break; 516 if (rp[i] == col) { 517 bap = ap + bs2*i + bs*cidx + ridx; 518 if (is == ADD_VALUES) *bap += value; 519 else *bap = value; 520 goto noinsert; 521 } 522 } 523 if (nonew) goto noinsert; 524 if (nrow >= rmax) { 525 /* there is no extra room in row, therefore enlarge */ 526 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 527 Scalar *new_a; 528 529 /* malloc new storage space */ 530 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 531 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 532 new_j = (int *) (new_a + bs2*new_nz); 533 new_i = new_j + new_nz; 534 535 /* copy over old data into new slots */ 536 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} 537 for ( ii=row+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 538 PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int)); 539 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 540 PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow, 541 len*sizeof(int)); 542 PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(Scalar)); 543 PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 544 PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE), 545 aa+bs2*(ai[row]+nrow),bs2*len*sizeof(Scalar)); 546 /* free up old matrix storage */ 547 PetscFree(a->a); 548 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 549 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 550 a->singlemalloc = 1; 551 552 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 553 rmax = imax[row] = imax[row] + CHUNKSIZE; 554 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 555 a->maxnz += bs2*CHUNKSIZE; 556 a->reallocs++; 557 a->nz++; 558 } 559 N = nrow++ - 1; 560 /* shift up all the later entries in this row */ 561 for ( ii=N; ii>=i; ii-- ) { 562 rp[ii+1] = rp[ii]; 563 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 564 } 565 if (N >= i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 566 rp[i] = col; 567 ap[bs2*i + bs*cidx + ridx] = value; 568 noinsert:; 569 low = i; 570 } 571 ailen[row] = nrow; 572 } 573 return 0; 574 } 575 576 #undef __FUNC__ 577 #define __FUNC__ "MatGetSize_SeqBAIJ" 578 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 579 { 580 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 581 *m = a->m; *n = a->n; 582 return 0; 583 } 584 585 #undef __FUNC__ 586 #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ" 587 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 588 { 589 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 590 *m = 0; *n = a->m; 591 return 0; 592 } 593 594 #undef __FUNC__ 595 #define __FUNC__ "MatGetRow_SeqBAIJ" 596 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 597 { 598 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 599 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 600 Scalar *aa,*v_i,*aa_i; 601 602 bs = a->bs; 603 ai = a->i; 604 aj = a->j; 605 aa = a->a; 606 bs2 = a->bs2; 607 608 if (row < 0 || row >= a->m) SETERRQ(1,0,"Row out of range"); 609 610 bn = row/bs; /* Block number */ 611 bp = row % bs; /* Block Position */ 612 M = ai[bn+1] - ai[bn]; 613 *nz = bs*M; 614 615 if (v) { 616 *v = 0; 617 if (*nz) { 618 *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v); 619 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 620 v_i = *v + i*bs; 621 aa_i = aa + bs2*(ai[bn] + i); 622 for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];} 623 } 624 } 625 } 626 627 if (idx) { 628 *idx = 0; 629 if (*nz) { 630 *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx); 631 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 632 idx_i = *idx + i*bs; 633 itmp = bs*aj[ai[bn] + i]; 634 for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;} 635 } 636 } 637 } 638 return 0; 639 } 640 641 #undef __FUNC__ 642 #define __FUNC__ "MatRestoreRow_SeqBAIJ" 643 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 644 { 645 if (idx) {if (*idx) PetscFree(*idx);} 646 if (v) {if (*v) PetscFree(*v);} 647 return 0; 648 } 649 650 #undef __FUNC__ 651 #define __FUNC__ "MatTranspose_SeqBAIJ" 652 static int MatTranspose_SeqBAIJ(Mat A,Mat *B) 653 { 654 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 655 Mat C; 656 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 657 int *rows,*cols,bs2=a->bs2; 658 Scalar *array=a->a; 659 660 if (B==PETSC_NULL && mbs!=nbs) 661 SETERRQ(1,0,"Square matrix only for in-place"); 662 col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col); 663 PetscMemzero(col,(1+nbs)*sizeof(int)); 664 665 for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1; 666 ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr); 667 PetscFree(col); 668 rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows); 669 cols = rows + bs; 670 for ( i=0; i<mbs; i++ ) { 671 cols[0] = i*bs; 672 for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1; 673 len = ai[i+1] - ai[i]; 674 for ( j=0; j<len; j++ ) { 675 rows[0] = (*aj++)*bs; 676 for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1; 677 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr); 678 array += bs2; 679 } 680 } 681 PetscFree(rows); 682 683 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 684 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 685 686 if (B != PETSC_NULL) { 687 *B = C; 688 } else { 689 /* This isn't really an in-place transpose */ 690 PetscFree(a->a); 691 if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);} 692 if (a->diag) PetscFree(a->diag); 693 if (a->ilen) PetscFree(a->ilen); 694 if (a->imax) PetscFree(a->imax); 695 if (a->solve_work) PetscFree(a->solve_work); 696 PetscFree(a); 697 PetscMemcpy(A,C,sizeof(struct _Mat)); 698 PetscHeaderDestroy(C); 699 } 700 return 0; 701 } 702 703 704 #undef __FUNC__ 705 #define __FUNC__ "MatAssemblyEnd_SeqBAIJ" 706 static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 707 { 708 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 709 int fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax; 710 int m = a->m,*ip, N, *ailen = a->ilen; 711 int mbs = a->mbs, bs2 = a->bs2,rmax; 712 Scalar *aa = a->a, *ap; 713 714 if (mode == MAT_FLUSH_ASSEMBLY) return 0; 715 716 rmax = ailen[0]; 717 for ( i=1; i<mbs; i++ ) { 718 /* move each row back by the amount of empty slots (fshift) before it*/ 719 fshift += imax[i-1] - ailen[i-1]; 720 rmax = PetscMax(rmax,ailen[i]); 721 if (fshift) { 722 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 723 N = ailen[i]; 724 for ( j=0; j<N; j++ ) { 725 ip[j-fshift] = ip[j]; 726 PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar)); 727 } 728 } 729 ai[i] = ai[i-1] + ailen[i-1]; 730 } 731 if (mbs) { 732 fshift += imax[mbs-1] - ailen[mbs-1]; 733 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 734 } 735 /* reset ilen and imax for each row */ 736 for ( i=0; i<mbs; i++ ) { 737 ailen[i] = imax[i] = ai[i+1] - ai[i]; 738 } 739 a->nz = ai[mbs]; 740 741 /* diagonals may have moved, so kill the diagonal pointers */ 742 if (fshift && a->diag) { 743 PetscFree(a->diag); 744 PLogObjectMemory(A,-(m+1)*sizeof(int)); 745 a->diag = 0; 746 } 747 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 748 m,a->n,a->bs,fshift*bs2,a->nz*bs2); 749 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n", 750 a->reallocs); 751 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 752 a->reallocs = 0; 753 A->info.nz_unneeded = (double)fshift*bs2; 754 755 return 0; 756 } 757 758 #undef __FUNC__ 759 #define __FUNC__ "MatZeroEntries_SeqBAIJ" 760 static int MatZeroEntries_SeqBAIJ(Mat A) 761 { 762 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 763 PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar)); 764 return 0; 765 } 766 767 #undef __FUNC__ 768 #define __FUNC__ "MatDestroy_SeqBAIJ" 769 int MatDestroy_SeqBAIJ(PetscObject obj) 770 { 771 Mat A = (Mat) obj; 772 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 773 int ierr; 774 775 #if defined(PETSC_LOG) 776 PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 777 #endif 778 PetscFree(a->a); 779 if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);} 780 if (a->diag) PetscFree(a->diag); 781 if (a->ilen) PetscFree(a->ilen); 782 if (a->imax) PetscFree(a->imax); 783 if (a->solve_work) PetscFree(a->solve_work); 784 if (a->mult_work) PetscFree(a->mult_work); 785 PetscFree(a); 786 if (A->mapping) { 787 ierr = ISLocalToGlobalMappingDestroy(A->mapping); CHKERRQ(ierr); 788 } 789 PLogObjectDestroy(A); 790 PetscHeaderDestroy(A); 791 return 0; 792 } 793 794 #undef __FUNC__ 795 #define __FUNC__ "MatSetOption_SeqBAIJ" 796 static int MatSetOption_SeqBAIJ(Mat A,MatOption op) 797 { 798 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 799 if (op == MAT_ROW_ORIENTED) a->roworiented = 1; 800 else if (op == MAT_COLUMN_ORIENTED) a->roworiented = 0; 801 else if (op == MAT_COLUMNS_SORTED) a->sorted = 1; 802 else if (op == MAT_COLUMNS_UNSORTED) a->sorted = 0; 803 else if (op == MAT_NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 804 else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 805 else if (op == MAT_ROWS_SORTED || 806 op == MAT_ROWS_UNSORTED || 807 op == MAT_SYMMETRIC || 808 op == MAT_STRUCTURALLY_SYMMETRIC || 809 op == MAT_YES_NEW_DIAGONALS || 810 op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES) 811 PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n"); 812 else if (op == MAT_NO_NEW_DIAGONALS) 813 {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");} 814 else 815 {SETERRQ(PETSC_ERR_SUP,0,"unknown option");} 816 return 0; 817 } 818 819 820 /* -------------------------------------------------------*/ 821 /* Should check that shapes of vectors and matrices match */ 822 /* -------------------------------------------------------*/ 823 #include "pinclude/plapack.h" 824 825 #undef __FUNC__ 826 #define __FUNC__ "MatMult_SeqBAIJ_1" 827 static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz) 828 { 829 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 830 register Scalar *x,*z,*v,sum; 831 int mbs=a->mbs,i,*idx,*ii,n; 832 833 VecGetArray_Fast(xx,x); 834 VecGetArray_Fast(zz,z); 835 836 idx = a->j; 837 v = a->a; 838 ii = a->i; 839 840 for ( i=0; i<mbs; i++ ) { 841 n = ii[1] - ii[0]; ii++; 842 sum = 0.0; 843 while (n--) sum += *v++ * x[*idx++]; 844 z[i] = sum; 845 } 846 VecRestoreArray_Fast(xx,x); 847 VecRestoreArray_Fast(zz,z); 848 PLogFlops(2*a->nz - a->m); 849 return 0; 850 } 851 852 #undef __FUNC__ 853 #define __FUNC__ "MatMult_SeqBAIJ_2" 854 static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz) 855 { 856 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 857 register Scalar *x,*z,*v,*xb,sum1,sum2; 858 register Scalar x1,x2; 859 int mbs=a->mbs,i,*idx,*ii; 860 int j,n; 861 862 VecGetArray_Fast(xx,x); 863 VecGetArray_Fast(zz,z); 864 865 idx = a->j; 866 v = a->a; 867 ii = a->i; 868 869 for ( i=0; i<mbs; i++ ) { 870 n = ii[1] - ii[0]; ii++; 871 sum1 = 0.0; sum2 = 0.0; 872 for ( j=0; j<n; j++ ) { 873 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 874 sum1 += v[0]*x1 + v[2]*x2; 875 sum2 += v[1]*x1 + v[3]*x2; 876 v += 4; 877 } 878 z[0] = sum1; z[1] = sum2; 879 z += 2; 880 } 881 VecRestoreArray_Fast(xx,x); 882 VecRestoreArray_Fast(zz,z); 883 PLogFlops(4*a->nz - a->m); 884 return 0; 885 } 886 887 #undef __FUNC__ 888 #define __FUNC__ "MatMult_SeqBAIJ_3" 889 static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz) 890 { 891 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 892 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 893 int mbs=a->mbs,i,*idx,*ii,j,n; 894 895 VecGetArray_Fast(xx,x); 896 VecGetArray_Fast(zz,z); 897 898 idx = a->j; 899 v = a->a; 900 ii = a->i; 901 902 for ( i=0; i<mbs; i++ ) { 903 n = ii[1] - ii[0]; ii++; 904 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 905 for ( j=0; j<n; j++ ) { 906 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 907 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 908 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 909 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 910 v += 9; 911 } 912 z[0] = sum1; z[1] = sum2; z[2] = sum3; 913 z += 3; 914 } 915 VecRestoreArray_Fast(xx,x); 916 VecRestoreArray_Fast(zz,z); 917 PLogFlops(18*a->nz - a->m); 918 return 0; 919 } 920 921 #undef __FUNC__ 922 #define __FUNC__ "MatMult_SeqBAIJ_4" 923 static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz) 924 { 925 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 926 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4; 927 register Scalar x1,x2,x3,x4; 928 int mbs=a->mbs,i,*idx,*ii; 929 int j,n; 930 931 VecGetArray_Fast(xx,x); 932 VecGetArray_Fast(zz,z); 933 934 idx = a->j; 935 v = a->a; 936 ii = a->i; 937 938 for ( i=0; i<mbs; i++ ) { 939 n = ii[1] - ii[0]; ii++; 940 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 941 for ( j=0; j<n; j++ ) { 942 xb = x + 4*(*idx++); 943 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 944 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 945 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 946 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 947 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 948 v += 16; 949 } 950 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 951 z += 4; 952 } 953 VecRestoreArray_Fast(xx,x); 954 VecRestoreArray_Fast(zz,z); 955 PLogFlops(32*a->nz - a->m); 956 return 0; 957 } 958 959 #undef __FUNC__ 960 #define __FUNC__ "MatMult_SeqBAIJ_5" 961 static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz) 962 { 963 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 964 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5; 965 register Scalar x1,x2,x3,x4,x5; 966 int mbs=a->mbs,i,*idx,*ii,j,n; 967 968 VecGetArray_Fast(xx,x); 969 VecGetArray_Fast(zz,z); 970 971 idx = a->j; 972 v = a->a; 973 ii = a->i; 974 975 for ( i=0; i<mbs; i++ ) { 976 n = ii[1] - ii[0]; ii++; 977 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 978 for ( j=0; j<n; j++ ) { 979 xb = x + 5*(*idx++); 980 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 981 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 982 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 983 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 984 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 985 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 986 v += 25; 987 } 988 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 989 z += 5; 990 } 991 VecRestoreArray_Fast(xx,x); 992 VecRestoreArray_Fast(zz,z); 993 PLogFlops(50*a->nz - a->m); 994 return 0; 995 } 996 997 #undef __FUNC__ 998 #define __FUNC__ "MatMult_SeqBAIJ_N" 999 static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz) 1000 { 1001 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1002 register Scalar *x,*z,*v,*xb; 1003 int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2; 1004 int _One = 1,ncols,k; 1005 Scalar _DOne = 1.0,*work,*workt,_DZero = 0.0; 1006 1007 VecGetArray_Fast(xx,x); 1008 VecGetArray_Fast(zz,z); 1009 1010 idx = a->j; 1011 v = a->a; 1012 ii = a->i; 1013 1014 1015 if (!a->mult_work) { 1016 k = PetscMax(a->m,a->n); 1017 a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work); 1018 } 1019 work = a->mult_work; 1020 for ( i=0; i<mbs; i++ ) { 1021 n = ii[1] - ii[0]; ii++; 1022 ncols = n*bs; 1023 workt = work; 1024 for ( j=0; j<n; j++ ) { 1025 xb = x + bs*(*idx++); 1026 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 1027 workt += bs; 1028 } 1029 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One); 1030 v += n*bs2; 1031 z += bs; 1032 } 1033 VecRestoreArray_Fast(xx,x); 1034 VecRestoreArray_Fast(zz,z); 1035 PLogFlops(2*a->nz*bs2 - a->m); 1036 return 0; 1037 } 1038 1039 #undef __FUNC__ 1040 #define __FUNC__ "MatMultAdd_SeqBAIJ_1" 1041 static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 1042 { 1043 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1044 register Scalar *x,*y,*z,*v,sum; 1045 int mbs=a->mbs,i,*idx,*ii,n; 1046 1047 VecGetArray_Fast(xx,x); 1048 VecGetArray_Fast(yy,y); 1049 VecGetArray_Fast(zz,z); 1050 1051 idx = a->j; 1052 v = a->a; 1053 ii = a->i; 1054 1055 for ( i=0; i<mbs; i++ ) { 1056 n = ii[1] - ii[0]; ii++; 1057 sum = y[i]; 1058 while (n--) sum += *v++ * x[*idx++]; 1059 z[i] = sum; 1060 } 1061 VecRestoreArray_Fast(xx,x); 1062 VecRestoreArray_Fast(yy,y); 1063 VecRestoreArray_Fast(zz,z); 1064 PLogFlops(2*a->nz); 1065 return 0; 1066 } 1067 1068 #undef __FUNC__ 1069 #define __FUNC__ "MatMultAdd_SeqBAIJ_2" 1070 static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 1071 { 1072 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1073 register Scalar *x,*y,*z,*v,*xb,sum1,sum2; 1074 register Scalar x1,x2; 1075 int mbs=a->mbs,i,*idx,*ii; 1076 int j,n; 1077 1078 VecGetArray_Fast(xx,x); 1079 VecGetArray_Fast(yy,y); 1080 VecGetArray_Fast(zz,z); 1081 1082 idx = a->j; 1083 v = a->a; 1084 ii = a->i; 1085 1086 for ( i=0; i<mbs; i++ ) { 1087 n = ii[1] - ii[0]; ii++; 1088 sum1 = y[0]; sum2 = y[1]; 1089 for ( j=0; j<n; j++ ) { 1090 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 1091 sum1 += v[0]*x1 + v[2]*x2; 1092 sum2 += v[1]*x1 + v[3]*x2; 1093 v += 4; 1094 } 1095 z[0] = sum1; z[1] = sum2; 1096 z += 2; y += 2; 1097 } 1098 VecRestoreArray_Fast(xx,x); 1099 VecRestoreArray_Fast(yy,y); 1100 VecRestoreArray_Fast(zz,z); 1101 PLogFlops(4*a->nz); 1102 return 0; 1103 } 1104 1105 #undef __FUNC__ 1106 #define __FUNC__ "MatMultAdd_SeqBAIJ_3" 1107 static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 1108 { 1109 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1110 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 1111 int mbs=a->mbs,i,*idx,*ii,j,n; 1112 1113 VecGetArray_Fast(xx,x); 1114 VecGetArray_Fast(yy,y); 1115 VecGetArray_Fast(zz,z); 1116 1117 idx = a->j; 1118 v = a->a; 1119 ii = a->i; 1120 1121 for ( i=0; i<mbs; i++ ) { 1122 n = ii[1] - ii[0]; ii++; 1123 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; 1124 for ( j=0; j<n; j++ ) { 1125 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1126 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 1127 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 1128 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 1129 v += 9; 1130 } 1131 z[0] = sum1; z[1] = sum2; z[2] = sum3; 1132 z += 3; y += 3; 1133 } 1134 VecRestoreArray_Fast(xx,x); 1135 VecRestoreArray_Fast(yy,y); 1136 VecRestoreArray_Fast(zz,z); 1137 PLogFlops(18*a->nz); 1138 return 0; 1139 } 1140 1141 #undef __FUNC__ 1142 #define __FUNC__ "MatMultAdd_SeqBAIJ_4" 1143 static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 1144 { 1145 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1146 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4; 1147 register Scalar x1,x2,x3,x4; 1148 int mbs=a->mbs,i,*idx,*ii; 1149 int j,n; 1150 1151 VecGetArray_Fast(xx,x); 1152 VecGetArray_Fast(yy,y); 1153 VecGetArray_Fast(zz,z); 1154 1155 idx = a->j; 1156 v = a->a; 1157 ii = a->i; 1158 1159 for ( i=0; i<mbs; i++ ) { 1160 n = ii[1] - ii[0]; ii++; 1161 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; 1162 for ( j=0; j<n; j++ ) { 1163 xb = x + 4*(*idx++); 1164 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1165 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 1166 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 1167 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 1168 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 1169 v += 16; 1170 } 1171 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 1172 z += 4; y += 4; 1173 } 1174 VecRestoreArray_Fast(xx,x); 1175 VecRestoreArray_Fast(yy,y); 1176 VecRestoreArray_Fast(zz,z); 1177 PLogFlops(32*a->nz); 1178 return 0; 1179 } 1180 1181 #undef __FUNC__ 1182 #define __FUNC__ "MatMultAdd_SeqBAIJ_5" 1183 static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 1184 { 1185 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1186 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5; 1187 register Scalar x1,x2,x3,x4,x5; 1188 int mbs=a->mbs,i,*idx,*ii,j,n; 1189 1190 VecGetArray_Fast(xx,x); 1191 VecGetArray_Fast(yy,y); 1192 VecGetArray_Fast(zz,z); 1193 1194 idx = a->j; 1195 v = a->a; 1196 ii = a->i; 1197 1198 for ( i=0; i<mbs; i++ ) { 1199 n = ii[1] - ii[0]; ii++; 1200 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; 1201 for ( j=0; j<n; j++ ) { 1202 xb = x + 5*(*idx++); 1203 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 1204 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 1205 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 1206 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 1207 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 1208 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 1209 v += 25; 1210 } 1211 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 1212 z += 5; y += 5; 1213 } 1214 VecRestoreArray_Fast(xx,x); 1215 VecRestoreArray_Fast(yy,y); 1216 VecRestoreArray_Fast(zz,z); 1217 PLogFlops(50*a->nz); 1218 return 0; 1219 } 1220 1221 #undef __FUNC__ 1222 #define __FUNC__ "MatMultAdd_SeqBAIJ_N" 1223 static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 1224 { 1225 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1226 register Scalar *x,*z,*v,*xb; 1227 int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr; 1228 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1229 1230 if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 1231 1232 VecGetArray_Fast(xx,x); 1233 VecGetArray_Fast(zz,z); 1234 1235 idx = a->j; 1236 v = a->a; 1237 ii = a->i; 1238 1239 1240 if (!a->mult_work) { 1241 k = PetscMax(a->m,a->n); 1242 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work); 1243 } 1244 work = a->mult_work; 1245 for ( i=0; i<mbs; i++ ) { 1246 n = ii[1] - ii[0]; ii++; 1247 ncols = n*bs; 1248 workt = work; 1249 for ( j=0; j<n; j++ ) { 1250 xb = x + bs*(*idx++); 1251 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 1252 workt += bs; 1253 } 1254 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); 1255 v += n*bs2; 1256 z += bs; 1257 } 1258 VecRestoreArray_Fast(xx,x); 1259 VecRestoreArray_Fast(zz,z); 1260 PLogFlops(2*a->nz*bs2 ); 1261 return 0; 1262 } 1263 1264 #undef __FUNC__ 1265 #define __FUNC__ "MatMultTrans_SeqBAIJ" 1266 static int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz) 1267 { 1268 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1269 Scalar *xg,*zg,*zb; 1270 register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 1271 int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 1272 int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1273 1274 1275 VecGetArray_Fast(xx,xg); x = xg; 1276 VecGetArray_Fast(zz,zg); z = zg; 1277 PetscMemzero(z,N*sizeof(Scalar)); 1278 1279 idx = a->j; 1280 v = a->a; 1281 ii = a->i; 1282 1283 switch (bs) { 1284 case 1: 1285 for ( i=0; i<mbs; i++ ) { 1286 n = ii[1] - ii[0]; ii++; 1287 xb = x + i; x1 = xb[0]; 1288 ib = idx + ai[i]; 1289 for ( j=0; j<n; j++ ) { 1290 rval = ib[j]; 1291 z[rval] += *v++ * x1; 1292 } 1293 } 1294 break; 1295 case 2: 1296 for ( i=0; i<mbs; i++ ) { 1297 n = ii[1] - ii[0]; ii++; 1298 xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1299 ib = idx + ai[i]; 1300 for ( j=0; j<n; j++ ) { 1301 rval = ib[j]*2; 1302 z[rval++] += v[0]*x1 + v[1]*x2; 1303 z[rval++] += v[2]*x1 + v[3]*x2; 1304 v += 4; 1305 } 1306 } 1307 break; 1308 case 3: 1309 for ( i=0; i<mbs; i++ ) { 1310 n = ii[1] - ii[0]; ii++; 1311 xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1312 ib = idx + ai[i]; 1313 for ( j=0; j<n; j++ ) { 1314 rval = ib[j]*3; 1315 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1316 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1317 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1318 v += 9; 1319 } 1320 } 1321 break; 1322 case 4: 1323 for ( i=0; i<mbs; i++ ) { 1324 n = ii[1] - ii[0]; ii++; 1325 xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1326 ib = idx + ai[i]; 1327 for ( j=0; j<n; j++ ) { 1328 rval = ib[j]*4; 1329 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1330 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1331 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1332 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1333 v += 16; 1334 } 1335 } 1336 break; 1337 case 5: 1338 for ( i=0; i<mbs; i++ ) { 1339 n = ii[1] - ii[0]; ii++; 1340 xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1341 x4 = xb[3]; x5 = xb[4]; 1342 ib = idx + ai[i]; 1343 for ( j=0; j<n; j++ ) { 1344 rval = ib[j]*5; 1345 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1346 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1347 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1348 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1349 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1350 v += 25; 1351 } 1352 } 1353 break; 1354 /* block sizes larger then 5 by 5 are handled by BLAS */ 1355 default: { 1356 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1357 if (!a->mult_work) { 1358 k = PetscMax(a->m,a->n); 1359 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1360 CHKPTRQ(a->mult_work); 1361 } 1362 work = a->mult_work; 1363 for ( i=0; i<mbs; i++ ) { 1364 n = ii[1] - ii[0]; ii++; 1365 ncols = n*bs; 1366 PetscMemzero(work,ncols*sizeof(Scalar)); 1367 LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1368 v += n*bs2; 1369 x += bs; 1370 workt = work; 1371 for ( j=0; j<n; j++ ) { 1372 zb = z + bs*(*idx++); 1373 for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1374 workt += bs; 1375 } 1376 } 1377 } 1378 } 1379 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 1380 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1381 PLogFlops(2*a->nz*a->bs2 - a->n); 1382 return 0; 1383 } 1384 1385 #undef __FUNC__ 1386 #define __FUNC__ "MatMultTransAdd_SeqBAIJ" 1387 static int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1388 { 1389 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1390 Scalar *xg,*zg,*zb; 1391 register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 1392 int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 1393 int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1394 1395 1396 1397 VecGetArray_Fast(xx,xg); x = xg; 1398 VecGetArray_Fast(zz,zg); z = zg; 1399 1400 if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 1401 else PetscMemzero(z,N*sizeof(Scalar)); 1402 1403 1404 idx = a->j; 1405 v = a->a; 1406 ii = a->i; 1407 1408 switch (bs) { 1409 case 1: 1410 for ( i=0; i<mbs; i++ ) { 1411 n = ii[1] - ii[0]; ii++; 1412 xb = x + i; x1 = xb[0]; 1413 ib = idx + ai[i]; 1414 for ( j=0; j<n; j++ ) { 1415 rval = ib[j]; 1416 z[rval] += *v++ * x1; 1417 } 1418 } 1419 break; 1420 case 2: 1421 for ( i=0; i<mbs; i++ ) { 1422 n = ii[1] - ii[0]; ii++; 1423 xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1424 ib = idx + ai[i]; 1425 for ( j=0; j<n; j++ ) { 1426 rval = ib[j]*2; 1427 z[rval++] += v[0]*x1 + v[1]*x2; 1428 z[rval++] += v[2]*x1 + v[3]*x2; 1429 v += 4; 1430 } 1431 } 1432 break; 1433 case 3: 1434 for ( i=0; i<mbs; i++ ) { 1435 n = ii[1] - ii[0]; ii++; 1436 xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1437 ib = idx + ai[i]; 1438 for ( j=0; j<n; j++ ) { 1439 rval = ib[j]*3; 1440 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1441 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1442 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1443 v += 9; 1444 } 1445 } 1446 break; 1447 case 4: 1448 for ( i=0; i<mbs; i++ ) { 1449 n = ii[1] - ii[0]; ii++; 1450 xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1451 ib = idx + ai[i]; 1452 for ( j=0; j<n; j++ ) { 1453 rval = ib[j]*4; 1454 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1455 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1456 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1457 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1458 v += 16; 1459 } 1460 } 1461 break; 1462 case 5: 1463 for ( i=0; i<mbs; i++ ) { 1464 n = ii[1] - ii[0]; ii++; 1465 xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1466 x4 = xb[3]; x5 = xb[4]; 1467 ib = idx + ai[i]; 1468 for ( j=0; j<n; j++ ) { 1469 rval = ib[j]*5; 1470 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1471 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1472 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1473 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1474 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1475 v += 25; 1476 } 1477 } 1478 break; 1479 /* block sizes larger then 5 by 5 are handled by BLAS */ 1480 default: { 1481 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1482 if (!a->mult_work) { 1483 k = PetscMax(a->m,a->n); 1484 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1485 CHKPTRQ(a->mult_work); 1486 } 1487 work = a->mult_work; 1488 for ( i=0; i<mbs; i++ ) { 1489 n = ii[1] - ii[0]; ii++; 1490 ncols = n*bs; 1491 PetscMemzero(work,ncols*sizeof(Scalar)); 1492 LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1493 v += n*bs2; 1494 x += bs; 1495 workt = work; 1496 for ( j=0; j<n; j++ ) { 1497 zb = z + bs*(*idx++); 1498 for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1499 workt += bs; 1500 } 1501 } 1502 } 1503 } 1504 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 1505 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1506 PLogFlops(2*a->nz*a->bs2); 1507 return 0; 1508 } 1509 1510 #undef __FUNC__ 1511 #define __FUNC__ "MatGetInfo_SeqBAIJ" 1512 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info) 1513 { 1514 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1515 1516 info->rows_global = (double)a->m; 1517 info->columns_global = (double)a->n; 1518 info->rows_local = (double)a->m; 1519 info->columns_local = (double)a->n; 1520 info->block_size = a->bs2; 1521 info->nz_allocated = a->maxnz; 1522 info->nz_used = a->bs2*a->nz; 1523 info->nz_unneeded = (double)(info->nz_allocated - info->nz_used); 1524 /* if (info->nz_unneeded != A->info.nz_unneeded) 1525 printf("space descrepancy: maxnz-nz = %d, nz_unneeded = %d\n",(int)info->nz_unneeded,(int)A->info.nz_unneeded); */ 1526 info->assemblies = A->num_ass; 1527 info->mallocs = a->reallocs; 1528 info->memory = A->mem; 1529 if (A->factor) { 1530 info->fill_ratio_given = A->info.fill_ratio_given; 1531 info->fill_ratio_needed = A->info.fill_ratio_needed; 1532 info->factor_mallocs = A->info.factor_mallocs; 1533 } else { 1534 info->fill_ratio_given = 0; 1535 info->fill_ratio_needed = 0; 1536 info->factor_mallocs = 0; 1537 } 1538 return 0; 1539 } 1540 1541 #undef __FUNC__ 1542 #define __FUNC__ "MatEqual_SeqBAIJ" 1543 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg) 1544 { 1545 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 1546 1547 if (B->type !=MATSEQBAIJ)SETERRQ(1,0,"Matrices must be same type"); 1548 1549 /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 1550 if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)|| 1551 (a->nz != b->nz)) { 1552 *flg = PETSC_FALSE; return 0; 1553 } 1554 1555 /* if the a->i are the same */ 1556 if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) { 1557 *flg = PETSC_FALSE; return 0; 1558 } 1559 1560 /* if a->j are the same */ 1561 if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) { 1562 *flg = PETSC_FALSE; return 0; 1563 } 1564 1565 /* if a->a are the same */ 1566 if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) { 1567 *flg = PETSC_FALSE; return 0; 1568 } 1569 *flg = PETSC_TRUE; 1570 return 0; 1571 1572 } 1573 1574 #undef __FUNC__ 1575 #define __FUNC__ "MatGetDiagonal_SeqBAIJ" 1576 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v) 1577 { 1578 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1579 int i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 1580 Scalar *x, zero = 0.0,*aa,*aa_j; 1581 1582 if (A->factor) SETERRQ(1,0,"Not for factored matrix"); 1583 bs = a->bs; 1584 aa = a->a; 1585 ai = a->i; 1586 aj = a->j; 1587 ambs = a->mbs; 1588 bs2 = a->bs2; 1589 1590 VecSet(&zero,v); 1591 VecGetArray_Fast(v,x); VecGetLocalSize_Fast(v,n); 1592 if (n != a->m) SETERRQ(1,0,"Nonconforming matrix and vector"); 1593 for ( i=0; i<ambs; i++ ) { 1594 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1595 if (aj[j] == i) { 1596 row = i*bs; 1597 aa_j = aa+j*bs2; 1598 for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 1599 break; 1600 } 1601 } 1602 } 1603 return 0; 1604 } 1605 1606 #undef __FUNC__ 1607 #define __FUNC__ "MatDiagonalScale_SeqBAIJ" 1608 static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr) 1609 { 1610 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1611 Scalar *l,*r,x,*v,*aa,*li,*ri; 1612 int i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2; 1613 1614 ai = a->i; 1615 aj = a->j; 1616 aa = a->a; 1617 m = a->m; 1618 n = a->n; 1619 bs = a->bs; 1620 mbs = a->mbs; 1621 bs2 = a->bs2; 1622 if (ll) { 1623 VecGetArray_Fast(ll,l); VecGetLocalSize_Fast(ll,lm); 1624 if (lm != m) SETERRQ(1,0,"Left scaling vector wrong length"); 1625 for ( i=0; i<mbs; i++ ) { /* for each block row */ 1626 M = ai[i+1] - ai[i]; 1627 li = l + i*bs; 1628 v = aa + bs2*ai[i]; 1629 for ( j=0; j<M; j++ ) { /* for each block */ 1630 for ( k=0; k<bs2; k++ ) { 1631 (*v++) *= li[k%bs]; 1632 } 1633 } 1634 } 1635 } 1636 1637 if (rr) { 1638 VecGetArray_Fast(rr,r); VecGetLocalSize_Fast(rr,rn); 1639 if (rn != n) SETERRQ(1,0,"Right scaling vector wrong length"); 1640 for ( i=0; i<mbs; i++ ) { /* for each block row */ 1641 M = ai[i+1] - ai[i]; 1642 v = aa + bs2*ai[i]; 1643 for ( j=0; j<M; j++ ) { /* for each block */ 1644 ri = r + bs*aj[ai[i]+j]; 1645 for ( k=0; k<bs; k++ ) { 1646 x = ri[k]; 1647 for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x; 1648 } 1649 } 1650 } 1651 } 1652 return 0; 1653 } 1654 1655 1656 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 1657 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 1658 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int); 1659 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*); 1660 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**); 1661 1662 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec); 1663 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec); 1664 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec); 1665 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec); 1666 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec); 1667 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec); 1668 1669 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 1670 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 1671 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 1672 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 1673 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 1674 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 1675 1676 #undef __FUNC__ 1677 #define __FUNC__ "MatNorm_SeqBAIJ" 1678 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm) 1679 { 1680 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1681 Scalar *v = a->a; 1682 double sum = 0.0; 1683 int i,nz=a->nz,bs2=a->bs2; 1684 1685 if (type == NORM_FROBENIUS) { 1686 for (i=0; i< bs2*nz; i++ ) { 1687 #if defined(PETSC_COMPLEX) 1688 sum += real(conj(*v)*(*v)); v++; 1689 #else 1690 sum += (*v)*(*v); v++; 1691 #endif 1692 } 1693 *norm = sqrt(sum); 1694 } 1695 else { 1696 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 1697 } 1698 return 0; 1699 } 1700 1701 /* 1702 note: This can only work for identity for row and col. It would 1703 be good to check this and otherwise generate an error. 1704 */ 1705 #undef __FUNC__ 1706 #define __FUNC__ "MatILUFactor_SeqBAIJ" 1707 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill) 1708 { 1709 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1710 Mat outA; 1711 int ierr; 1712 1713 if (fill != 0) SETERRQ(1,0,"Only fill=0 supported"); 1714 1715 outA = inA; 1716 inA->factor = FACTOR_LU; 1717 a->row = row; 1718 a->col = col; 1719 1720 a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 1721 1722 if (!a->diag) { 1723 ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr); 1724 } 1725 ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr); 1726 return 0; 1727 } 1728 1729 #undef __FUNC__ 1730 #define __FUNC__ "MatScale_SeqBAIJ" 1731 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA) 1732 { 1733 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1734 int one = 1, totalnz = a->bs2*a->nz; 1735 BLscal_( &totalnz, alpha, a->a, &one ); 1736 PLogFlops(totalnz); 1737 return 0; 1738 } 1739 1740 #undef __FUNC__ 1741 #define __FUNC__ "MatGetValues_SeqBAIJ" 1742 static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 1743 { 1744 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1745 int *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 1746 int *ai = a->i, *ailen = a->ilen; 1747 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 1748 Scalar *ap, *aa = a->a, zero = 0.0; 1749 1750 for ( k=0; k<m; k++ ) { /* loop over rows */ 1751 row = im[k]; brow = row/bs; 1752 if (row < 0) SETERRQ(1,0,"Negative row"); 1753 if (row >= a->m) SETERRQ(1,0,"Row too large"); 1754 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1755 nrow = ailen[brow]; 1756 for ( l=0; l<n; l++ ) { /* loop over columns */ 1757 if (in[l] < 0) SETERRQ(1,0,"Negative column"); 1758 if (in[l] >= a->n) SETERRQ(1,0,"Column too large"); 1759 col = in[l] ; 1760 bcol = col/bs; 1761 cidx = col%bs; 1762 ridx = row%bs; 1763 high = nrow; 1764 low = 0; /* assume unsorted */ 1765 while (high-low > 5) { 1766 t = (low+high)/2; 1767 if (rp[t] > bcol) high = t; 1768 else low = t; 1769 } 1770 for ( i=low; i<high; i++ ) { 1771 if (rp[i] > bcol) break; 1772 if (rp[i] == bcol) { 1773 *v++ = ap[bs2*i+bs*cidx+ridx]; 1774 goto finished; 1775 } 1776 } 1777 *v++ = zero; 1778 finished:; 1779 } 1780 } 1781 return 0; 1782 } 1783 1784 #undef __FUNC__ 1785 #define __FUNC__ "MatGetBlockSize_SeqBAIJ" 1786 static int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs) 1787 { 1788 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data; 1789 *bs = baij->bs; 1790 return 0; 1791 } 1792 1793 /* idx should be of length atlease bs */ 1794 #undef __FUNC__ 1795 #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Block" 1796 static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg) 1797 { 1798 int i,row; 1799 row = idx[0]; 1800 if (row%bs!=0) { *flg = PETSC_FALSE; return 0; } 1801 1802 for ( i=1; i<bs; i++ ) { 1803 if (row+i != idx[i]) { *flg = PETSC_FALSE; return 0; } 1804 } 1805 *flg = PETSC_TRUE; 1806 return 0; 1807 } 1808 1809 #undef __FUNC__ 1810 #define __FUNC__ "MatZeroRows_SeqBAIJ" 1811 static int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag) 1812 { 1813 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1814 IS is_local; 1815 int ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2; 1816 PetscTruth flg; 1817 Scalar zero = 0.0,*aa; 1818 1819 /* Make a copy of the IS and sort it */ 1820 ierr = ISGetSize(is,&is_n);CHKERRQ(ierr); 1821 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1822 ierr = ISCreateGeneral(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr); 1823 ierr = ISSort(is_local); CHKERRQ(ierr); 1824 ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr); 1825 1826 i = 0; 1827 while (i < is_n) { 1828 if (rows[i]<0 || rows[i]>m) SETERRQ(1,0,"row out of range"); 1829 flg = PETSC_FALSE; 1830 if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); } 1831 if (flg) { /* There exists a block of rows to be Zerowed */ 1832 baij->ilen[rows[i]/bs] = 0; 1833 i += bs; 1834 } else { /* Zero out only the requested row */ 1835 count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs; 1836 aa = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs); 1837 for ( j=0; j<count; j++ ) { 1838 aa[0] = zero; 1839 aa+=bs; 1840 } 1841 i++; 1842 } 1843 } 1844 if (diag) { 1845 for ( j=0; j<is_n; j++ ) { 1846 ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1847 } 1848 } 1849 ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr); 1850 ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr); 1851 ierr = ISDestroy(is_local); CHKERRQ(ierr); 1852 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1853 1854 return 0; 1855 } 1856 1857 #undef __FUNC__ 1858 #define __FUNC__ "MatPrintHelp_SeqBAIJ" 1859 int MatPrintHelp_SeqBAIJ(Mat A) 1860 { 1861 static int called = 0; 1862 MPI_Comm comm = A->comm; 1863 1864 if (called) return 0; else called = 1; 1865 PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n"); 1866 PetscPrintf(comm," -mat_block_size <block_size>\n"); 1867 return 0; 1868 } 1869 1870 /* -------------------------------------------------------------------*/ 1871 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ, 1872 MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ, 1873 MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N, 1874 MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ, 1875 MatSolve_SeqBAIJ_N,0, 1876 0,0, 1877 MatLUFactor_SeqBAIJ,0, 1878 0, 1879 MatTranspose_SeqBAIJ, 1880 MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ, 1881 MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ, 1882 0,MatAssemblyEnd_SeqBAIJ, 1883 0, 1884 MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ, 1885 MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0, 1886 MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ, 1887 MatILUFactorSymbolic_SeqBAIJ,0, 1888 0,0, 1889 MatConvertSameType_SeqBAIJ,0,0, 1890 MatILUFactor_SeqBAIJ,0,0, 1891 MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ, 1892 MatGetValues_SeqBAIJ,0, 1893 MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ, 1894 0,0,0,MatGetBlockSize_SeqBAIJ, 1895 MatGetRowIJ_SeqBAIJ, 1896 MatRestoreRowIJ_SeqBAIJ, 1897 0,0,0,0,0,0, 1898 MatSetValuesBlocked_SeqBAIJ}; 1899 1900 #undef __FUNC__ 1901 #define __FUNC__ "MatCreateSeqBAIJ" 1902 /*@C 1903 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1904 compressed row) format. For good matrix assembly performance the 1905 user should preallocate the matrix storage by setting the parameter nz 1906 (or the array nzz). By setting these parameters accurately, performance 1907 during matrix assembly can be increased by more than a factor of 50. 1908 1909 Input Parameters: 1910 . comm - MPI communicator, set to MPI_COMM_SELF 1911 . bs - size of block 1912 . m - number of rows 1913 . n - number of columns 1914 . nz - number of block nonzeros per block row (same for all rows) 1915 . nzz - array containing the number of block nonzeros in the various block rows 1916 (possibly different for each block row) or PETSC_NULL 1917 1918 Output Parameter: 1919 . A - the matrix 1920 1921 Options Database Keys: 1922 $ -mat_no_unroll - uses code that does not unroll the loops in the 1923 $ block calculations (much solver) 1924 $ -mat_block_size - size of the blocks to use 1925 1926 Notes: 1927 The block AIJ format is fully compatible with standard Fortran 77 1928 storage. That is, the stored row and column indices can begin at 1929 either one (as in Fortran) or zero. See the users' manual for details. 1930 1931 Specify the preallocated storage with either nz or nnz (not both). 1932 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1933 allocation. For additional details, see the users manual chapter on 1934 matrices. 1935 1936 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 1937 @*/ 1938 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 1939 { 1940 Mat B; 1941 Mat_SeqBAIJ *b; 1942 int i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs,size; 1943 1944 MPI_Comm_size(comm,&size); 1945 if (size > 1) SETERRQ(1,0,"Comm must be of size 1"); 1946 1947 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr); 1948 1949 if (mbs*bs!=m || nbs*bs!=n) 1950 SETERRQ(1,0,"Number rows, cols must be divisible by blocksize"); 1951 1952 *A = 0; 1953 PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm); 1954 PLogObjectCreate(B); 1955 B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b); 1956 PetscMemzero(b,sizeof(Mat_SeqBAIJ)); 1957 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 1958 ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr); 1959 if (!flg) { 1960 switch (bs) { 1961 case 1: 1962 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1963 B->ops.solve = MatSolve_SeqBAIJ_1; 1964 B->ops.mult = MatMult_SeqBAIJ_1; 1965 B->ops.multadd = MatMultAdd_SeqBAIJ_1; 1966 break; 1967 case 2: 1968 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 1969 B->ops.solve = MatSolve_SeqBAIJ_2; 1970 B->ops.mult = MatMult_SeqBAIJ_2; 1971 B->ops.multadd = MatMultAdd_SeqBAIJ_2; 1972 break; 1973 case 3: 1974 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 1975 B->ops.solve = MatSolve_SeqBAIJ_3; 1976 B->ops.mult = MatMult_SeqBAIJ_3; 1977 B->ops.multadd = MatMultAdd_SeqBAIJ_3; 1978 break; 1979 case 4: 1980 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 1981 B->ops.solve = MatSolve_SeqBAIJ_4; 1982 B->ops.mult = MatMult_SeqBAIJ_4; 1983 B->ops.multadd = MatMultAdd_SeqBAIJ_4; 1984 break; 1985 case 5: 1986 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 1987 B->ops.solve = MatSolve_SeqBAIJ_5; 1988 B->ops.mult = MatMult_SeqBAIJ_5; 1989 B->ops.multadd = MatMultAdd_SeqBAIJ_5; 1990 break; 1991 } 1992 } 1993 B->destroy = MatDestroy_SeqBAIJ; 1994 B->view = MatView_SeqBAIJ; 1995 B->factor = 0; 1996 B->lupivotthreshold = 1.0; 1997 B->mapping = 0; 1998 b->row = 0; 1999 b->col = 0; 2000 b->reallocs = 0; 2001 2002 b->m = m; B->m = m; B->M = m; 2003 b->n = n; B->n = n; B->N = n; 2004 b->mbs = mbs; 2005 b->nbs = nbs; 2006 b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax); 2007 if (nnz == PETSC_NULL) { 2008 if (nz == PETSC_DEFAULT) nz = 5; 2009 else if (nz <= 0) nz = 1; 2010 for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 2011 nz = nz*mbs; 2012 } 2013 else { 2014 nz = 0; 2015 for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 2016 } 2017 2018 /* allocate the matrix space */ 2019 len = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int); 2020 b->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a); 2021 PetscMemzero(b->a,nz*bs2*sizeof(Scalar)); 2022 b->j = (int *) (b->a + nz*bs2); 2023 PetscMemzero(b->j,nz*sizeof(int)); 2024 b->i = b->j + nz; 2025 b->singlemalloc = 1; 2026 2027 b->i[0] = 0; 2028 for (i=1; i<mbs+1; i++) { 2029 b->i[i] = b->i[i-1] + b->imax[i-1]; 2030 } 2031 2032 /* b->ilen will count nonzeros in each block row so far. */ 2033 b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 2034 PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 2035 for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 2036 2037 b->bs = bs; 2038 b->bs2 = bs2; 2039 b->mbs = mbs; 2040 b->nz = 0; 2041 b->maxnz = nz*bs2; 2042 b->sorted = 0; 2043 b->roworiented = 1; 2044 b->nonew = 0; 2045 b->diag = 0; 2046 b->solve_work = 0; 2047 b->mult_work = 0; 2048 b->spptr = 0; 2049 B->info.nz_unneeded = (double)b->maxnz; 2050 2051 *A = B; 2052 ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr); 2053 if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); } 2054 return 0; 2055 } 2056 2057 #undef __FUNC__ 2058 #define __FUNC__ "MatConvertSameType_SeqBAIJ" 2059 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues) 2060 { 2061 Mat C; 2062 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 2063 int i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 2064 2065 if (a->i[mbs] != nz) SETERRQ(1,0,"Corrupt matrix"); 2066 2067 *B = 0; 2068 PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm); 2069 PLogObjectCreate(C); 2070 C->data = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c); 2071 PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps)); 2072 C->destroy = MatDestroy_SeqBAIJ; 2073 C->view = MatView_SeqBAIJ; 2074 C->factor = A->factor; 2075 c->row = 0; 2076 c->col = 0; 2077 C->assembled = PETSC_TRUE; 2078 2079 c->m = C->m = a->m; 2080 c->n = C->n = a->n; 2081 C->M = a->m; 2082 C->N = a->n; 2083 2084 c->bs = a->bs; 2085 c->bs2 = a->bs2; 2086 c->mbs = a->mbs; 2087 c->nbs = a->nbs; 2088 2089 c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax); 2090 c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen); 2091 for ( i=0; i<mbs; i++ ) { 2092 c->imax[i] = a->imax[i]; 2093 c->ilen[i] = a->ilen[i]; 2094 } 2095 2096 /* allocate the matrix space */ 2097 c->singlemalloc = 1; 2098 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int)); 2099 c->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a); 2100 c->j = (int *) (c->a + nz*bs2); 2101 c->i = c->j + nz; 2102 PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int)); 2103 if (mbs > 0) { 2104 PetscMemcpy(c->j,a->j,nz*sizeof(int)); 2105 if (cpvalues == COPY_VALUES) { 2106 PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar)); 2107 } 2108 } 2109 2110 PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 2111 c->sorted = a->sorted; 2112 c->roworiented = a->roworiented; 2113 c->nonew = a->nonew; 2114 2115 if (a->diag) { 2116 c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag); 2117 PLogObjectMemory(C,(mbs+1)*sizeof(int)); 2118 for ( i=0; i<mbs; i++ ) { 2119 c->diag[i] = a->diag[i]; 2120 } 2121 } 2122 else c->diag = 0; 2123 c->nz = a->nz; 2124 c->maxnz = a->maxnz; 2125 c->solve_work = 0; 2126 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 2127 c->mult_work = 0; 2128 *B = C; 2129 return 0; 2130 } 2131 2132 #undef __FUNC__ 2133 #define __FUNC__ "MatLoad_SeqBAIJ" 2134 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 2135 { 2136 Mat_SeqBAIJ *a; 2137 Mat B; 2138 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg; 2139 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2140 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 2141 int *masked, nmask,tmp,bs2,ishift; 2142 Scalar *aa; 2143 MPI_Comm comm = ((PetscObject) viewer)->comm; 2144 2145 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 2146 bs2 = bs*bs; 2147 2148 MPI_Comm_size(comm,&size); 2149 if (size > 1) SETERRQ(1,0,"view must have one processor"); 2150 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 2151 ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr); 2152 if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not Mat object"); 2153 M = header[1]; N = header[2]; nz = header[3]; 2154 2155 if (M != N) SETERRQ(1,0,"Can only do square matrices"); 2156 2157 /* 2158 This code adds extra rows to make sure the number of rows is 2159 divisible by the blocksize 2160 */ 2161 mbs = M/bs; 2162 extra_rows = bs - M + bs*(mbs); 2163 if (extra_rows == bs) extra_rows = 0; 2164 else mbs++; 2165 if (extra_rows) { 2166 PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 2167 } 2168 2169 /* read in row lengths */ 2170 rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 2171 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 2172 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 2173 2174 /* read in column indices */ 2175 jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj); 2176 ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr); 2177 for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 2178 2179 /* loop over row lengths determining block row lengths */ 2180 browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 2181 PetscMemzero(browlengths,mbs*sizeof(int)); 2182 mask = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask); 2183 PetscMemzero(mask,mbs*sizeof(int)); 2184 masked = mask + mbs; 2185 rowcount = 0; nzcount = 0; 2186 for ( i=0; i<mbs; i++ ) { 2187 nmask = 0; 2188 for ( j=0; j<bs; j++ ) { 2189 kmax = rowlengths[rowcount]; 2190 for ( k=0; k<kmax; k++ ) { 2191 tmp = jj[nzcount++]/bs; 2192 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2193 } 2194 rowcount++; 2195 } 2196 browlengths[i] += nmask; 2197 /* zero out the mask elements we set */ 2198 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 2199 } 2200 2201 /* create our matrix */ 2202 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A); 2203 CHKERRQ(ierr); 2204 B = *A; 2205 a = (Mat_SeqBAIJ *) B->data; 2206 2207 /* set matrix "i" values */ 2208 a->i[0] = 0; 2209 for ( i=1; i<= mbs; i++ ) { 2210 a->i[i] = a->i[i-1] + browlengths[i-1]; 2211 a->ilen[i-1] = browlengths[i-1]; 2212 } 2213 a->nz = 0; 2214 for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 2215 2216 /* read in nonzero values */ 2217 aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 2218 ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr); 2219 for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 2220 2221 /* set "a" and "j" values into matrix */ 2222 nzcount = 0; jcount = 0; 2223 for ( i=0; i<mbs; i++ ) { 2224 nzcountb = nzcount; 2225 nmask = 0; 2226 for ( j=0; j<bs; j++ ) { 2227 kmax = rowlengths[i*bs+j]; 2228 for ( k=0; k<kmax; k++ ) { 2229 tmp = jj[nzcount++]/bs; 2230 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2231 } 2232 rowcount++; 2233 } 2234 /* sort the masked values */ 2235 PetscSortInt(nmask,masked); 2236 2237 /* set "j" values into matrix */ 2238 maskcount = 1; 2239 for ( j=0; j<nmask; j++ ) { 2240 a->j[jcount++] = masked[j]; 2241 mask[masked[j]] = maskcount++; 2242 } 2243 /* set "a" values into matrix */ 2244 ishift = bs2*a->i[i]; 2245 for ( j=0; j<bs; j++ ) { 2246 kmax = rowlengths[i*bs+j]; 2247 for ( k=0; k<kmax; k++ ) { 2248 tmp = jj[nzcountb]/bs ; 2249 block = mask[tmp] - 1; 2250 point = jj[nzcountb] - bs*tmp; 2251 idx = ishift + bs2*block + j + bs*point; 2252 a->a[idx] = aa[nzcountb++]; 2253 } 2254 } 2255 /* zero out the mask elements we set */ 2256 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 2257 } 2258 if (jcount != a->nz) SETERRQ(1,0,"Error bad binary matrix"); 2259 2260 PetscFree(rowlengths); 2261 PetscFree(browlengths); 2262 PetscFree(aa); 2263 PetscFree(jj); 2264 PetscFree(mask); 2265 2266 B->assembled = PETSC_TRUE; 2267 2268 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr); 2269 if (flg) { 2270 Viewer tviewer; 2271 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2272 ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO,0);CHKERRQ(ierr); 2273 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2274 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2275 } 2276 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr); 2277 if (flg) { 2278 Viewer tviewer; 2279 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2280 ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO_LONG,0);CHKERRQ(ierr); 2281 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2282 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2283 } 2284 ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr); 2285 if (flg) { 2286 Viewer tviewer; 2287 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2288 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2289 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2290 } 2291 ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr); 2292 if (flg) { 2293 Viewer tviewer; 2294 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2295 ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_MATLAB,"M");CHKERRQ(ierr); 2296 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2297 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2298 } 2299 ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr); 2300 if (flg) { 2301 Viewer tviewer; 2302 ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr); 2303 ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr); 2304 ierr = ViewerFlush(tviewer); CHKERRQ(ierr); 2305 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2306 } 2307 return 0; 2308 } 2309 2310 2311 2312