1 #ifndef lint 2 static char vcid[] = "$Id: baij.c,v 1.89 1997/02/05 23:30:18 balay Exp balay $"; 3 #endif 4 5 /* 6 Defines the basic matrix operations for the BAIJ (compressed row) 7 matrix storage format. 8 */ 9 #include "src/mat/impls/baij/seq/baij.h" 10 #include "src/vec/vecimpl.h" 11 #include "src/inline/spops.h" 12 #include "petsc.h" 13 14 15 /* 16 Adds diagonal pointers to sparse matrix structure. 17 */ 18 19 #undef __FUNC__ 20 #define __FUNC__ "MatMarkDiag_SeqBAIJ" 21 int MatMarkDiag_SeqBAIJ(Mat A) 22 { 23 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 24 int i,j, *diag, m = a->mbs; 25 26 diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag); 27 PLogObjectMemory(A,(m+1)*sizeof(int)); 28 for ( i=0; i<m; i++ ) { 29 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 30 if (a->j[j] == i) { 31 diag[i] = j; 32 break; 33 } 34 } 35 } 36 a->diag = diag; 37 return 0; 38 } 39 40 #include "draw.h" 41 #include "pinclude/pviewer.h" 42 #include "sys.h" 43 44 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 45 46 #undef __FUNC__ 47 #define __FUNC__ "MatGetRowIJ_SeqBAIJ" 48 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 49 PetscTruth *done) 50 { 51 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 52 int ierr, n = a->mbs,i; 53 54 *nn = n; 55 if (!ia) return 0; 56 if (symmetric) { 57 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 58 } else if (oshift == 1) { 59 /* temporarily add 1 to i and j indices */ 60 int nz = a->i[n] + 1; 61 for ( i=0; i<nz; i++ ) a->j[i]++; 62 for ( i=0; i<n+1; i++ ) a->i[i]++; 63 *ia = a->i; *ja = a->j; 64 } else { 65 *ia = a->i; *ja = a->j; 66 } 67 68 return 0; 69 } 70 71 #undef __FUNC__ 72 #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ" 73 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 74 PetscTruth *done) 75 { 76 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 77 int i,n = a->mbs; 78 79 if (!ia) return 0; 80 if (symmetric) { 81 PetscFree(*ia); 82 PetscFree(*ja); 83 } else if (oshift == 1) { 84 int nz = a->i[n]; 85 for ( i=0; i<nz; i++ ) a->j[i]--; 86 for ( i=0; i<n+1; i++ ) a->i[i]--; 87 } 88 return 0; 89 } 90 91 92 #undef __FUNC__ 93 #define __FUNC__ "MatView_SeqBAIJ_Binary" 94 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 95 { 96 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 97 int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 98 Scalar *aa; 99 100 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 101 col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 102 col_lens[0] = MAT_COOKIE; 103 104 col_lens[1] = a->m; 105 col_lens[2] = a->n; 106 col_lens[3] = a->nz*bs2; 107 108 /* store lengths of each row and write (including header) to file */ 109 count = 0; 110 for ( i=0; i<a->mbs; i++ ) { 111 for ( j=0; j<bs; j++ ) { 112 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 113 } 114 } 115 ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr); 116 PetscFree(col_lens); 117 118 /* store column indices (zero start index) */ 119 jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj); 120 count = 0; 121 for ( i=0; i<a->mbs; i++ ) { 122 for ( j=0; j<bs; j++ ) { 123 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 124 for ( l=0; l<bs; l++ ) { 125 jj[count++] = bs*a->j[k] + l; 126 } 127 } 128 } 129 } 130 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr); 131 PetscFree(jj); 132 133 /* store nonzero values */ 134 aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa); 135 count = 0; 136 for ( i=0; i<a->mbs; i++ ) { 137 for ( j=0; j<bs; j++ ) { 138 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 139 for ( l=0; l<bs; l++ ) { 140 aa[count++] = a->a[bs2*k + l*bs + j]; 141 } 142 } 143 } 144 } 145 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr); 146 PetscFree(aa); 147 return 0; 148 } 149 150 #undef __FUNC__ 151 #define __FUNC__ "MatView_SeqBAIJ_ASCII" 152 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 153 { 154 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 155 int ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2; 156 FILE *fd; 157 char *outputname; 158 159 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 160 ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr); 161 ierr = ViewerGetFormat(viewer,&format); 162 if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) { 163 fprintf(fd," block size is %d\n",bs); 164 } 165 else if (format == VIEWER_FORMAT_ASCII_MATLAB) { 166 SETERRQ(1,0,"Matlab format not supported"); 167 } 168 else if (format == VIEWER_FORMAT_ASCII_COMMON) { 169 for ( i=0; i<a->mbs; i++ ) { 170 for ( j=0; j<bs; j++ ) { 171 fprintf(fd,"row %d:",i*bs+j); 172 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 173 for ( l=0; l<bs; l++ ) { 174 #if defined(PETSC_COMPLEX) 175 if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0) 176 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 177 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 178 else if (real(a->a[bs2*k + l*bs + j]) != 0.0) 179 fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 180 #else 181 if (a->a[bs2*k + l*bs + j] != 0.0) 182 fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 183 #endif 184 } 185 } 186 fprintf(fd,"\n"); 187 } 188 } 189 } 190 else { 191 for ( i=0; i<a->mbs; i++ ) { 192 for ( j=0; j<bs; j++ ) { 193 fprintf(fd,"row %d:",i*bs+j); 194 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 195 for ( l=0; l<bs; l++ ) { 196 #if defined(PETSC_COMPLEX) 197 if (imag(a->a[bs2*k + l*bs + j]) != 0.0) { 198 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 199 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 200 } 201 else { 202 fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 203 } 204 #else 205 fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 206 #endif 207 } 208 } 209 fprintf(fd,"\n"); 210 } 211 } 212 } 213 fflush(fd); 214 return 0; 215 } 216 217 #undef __FUNC__ 218 #define __FUNC__ "MatView_SeqBAIJ_Draw" 219 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer) 220 { 221 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *) A->data; 222 int row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2; 223 double xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r; 224 Scalar *aa; 225 Draw draw; 226 DrawButton button; 227 PetscTruth isnull; 228 229 ViewerDrawGetDraw(viewer,&draw); 230 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 231 232 xr = a->n; yr = a->m; h = yr/10.0; w = xr/10.0; 233 xr += w; yr += h; xl = -w; yl = -h; 234 ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr); 235 /* loop over matrix elements drawing boxes */ 236 color = DRAW_BLUE; 237 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 238 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 239 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 240 x_l = a->j[j]*bs; x_r = x_l + 1.0; 241 aa = a->a + j*bs2; 242 for ( k=0; k<bs; k++ ) { 243 for ( l=0; l<bs; l++ ) { 244 if (PetscReal(*aa++) >= 0.) continue; 245 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 246 } 247 } 248 } 249 } 250 color = DRAW_CYAN; 251 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 252 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 253 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 254 x_l = a->j[j]*bs; x_r = x_l + 1.0; 255 aa = a->a + j*bs2; 256 for ( k=0; k<bs; k++ ) { 257 for ( l=0; l<bs; l++ ) { 258 if (PetscReal(*aa++) != 0.) continue; 259 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 260 } 261 } 262 } 263 } 264 265 color = DRAW_RED; 266 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 267 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 268 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 269 x_l = a->j[j]*bs; x_r = x_l + 1.0; 270 aa = a->a + j*bs2; 271 for ( k=0; k<bs; k++ ) { 272 for ( l=0; l<bs; l++ ) { 273 if (PetscReal(*aa++) <= 0.) continue; 274 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 275 } 276 } 277 } 278 } 279 280 DrawFlush(draw); 281 DrawGetPause(draw,&pause); 282 if (pause >= 0) { PetscSleep(pause); return 0;} 283 284 /* allow the matrix to zoom or shrink */ 285 ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0); 286 while (button != BUTTON_RIGHT) { 287 DrawClear(draw); 288 if (button == BUTTON_LEFT) scale = .5; 289 else if (button == BUTTON_CENTER) scale = 2.; 290 xl = scale*(xl + w - xc) + xc - w*scale; 291 xr = scale*(xr - w - xc) + xc + w*scale; 292 yl = scale*(yl + h - yc) + yc - h*scale; 293 yr = scale*(yr - h - yc) + yc + h*scale; 294 w *= scale; h *= scale; 295 ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr); 296 color = DRAW_BLUE; 297 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 298 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 299 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 300 x_l = a->j[j]*bs; x_r = x_l + 1.0; 301 aa = a->a + j*bs2; 302 for ( k=0; k<bs; k++ ) { 303 for ( l=0; l<bs; l++ ) { 304 if (PetscReal(*aa++) >= 0.) continue; 305 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 306 } 307 } 308 } 309 } 310 color = DRAW_CYAN; 311 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 312 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 313 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 314 x_l = a->j[j]*bs; x_r = x_l + 1.0; 315 aa = a->a + j*bs2; 316 for ( k=0; k<bs; k++ ) { 317 for ( l=0; l<bs; l++ ) { 318 if (PetscReal(*aa++) != 0.) continue; 319 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 320 } 321 } 322 } 323 } 324 325 color = DRAW_RED; 326 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 327 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 328 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 329 x_l = a->j[j]*bs; x_r = x_l + 1.0; 330 aa = a->a + j*bs2; 331 for ( k=0; k<bs; k++ ) { 332 for ( l=0; l<bs; l++ ) { 333 if (PetscReal(*aa++) <= 0.) continue; 334 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 335 } 336 } 337 } 338 } 339 ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0); 340 } 341 return 0; 342 } 343 344 #undef __FUNC__ 345 #define __FUNC__ "MatView_SeqBAIJ" 346 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer) 347 { 348 Mat A = (Mat) obj; 349 ViewerType vtype; 350 int ierr; 351 352 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 353 if (vtype == MATLAB_VIEWER) { 354 SETERRQ(1,0,"Matlab viewer not supported"); 355 } 356 else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){ 357 return MatView_SeqBAIJ_ASCII(A,viewer); 358 } 359 else if (vtype == BINARY_FILE_VIEWER) { 360 return MatView_SeqBAIJ_Binary(A,viewer); 361 } 362 else if (vtype == DRAW_VIEWER) { 363 return MatView_SeqBAIJ_Draw(A,viewer); 364 } 365 return 0; 366 } 367 368 #define CHUNKSIZE 10 369 370 #undef __FUNC__ 371 #define __FUNC__ "MatSetValues_SeqBAIJ" 372 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 373 { 374 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 375 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 376 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 377 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 378 int ridx,cidx,bs2=a->bs2; 379 Scalar *ap,value,*aa=a->a,*bap; 380 381 for ( k=0; k<m; k++ ) { /* loop over added rows */ 382 row = im[k]; brow = row/bs; 383 #if defined(PETSC_BOPT_g) 384 if (row < 0) SETERRQ(1,0,"Negative row"); 385 if (row >= a->m) SETERRQ(1,0,"Row too large"); 386 #endif 387 rp = aj + ai[brow]; 388 ap = aa + bs2*ai[brow]; 389 rmax = imax[brow]; 390 nrow = ailen[brow]; 391 low = 0; 392 for ( l=0; l<n; l++ ) { /* loop over added columns */ 393 #if defined(PETSC_BOPT_g) 394 if (in[l] < 0) SETERRQ(1,0,"Negative column"); 395 if (in[l] >= a->n) SETERRQ(1,0,"Column too large"); 396 #endif 397 col = in[l]; bcol = col/bs; 398 ridx = row % bs; cidx = col % bs; 399 if (roworiented) { 400 value = *v++; 401 } 402 else { 403 value = v[k + l*m]; 404 } 405 if (!sorted) low = 0; high = nrow; 406 while (high-low > 7) { 407 t = (low+high)/2; 408 if (rp[t] > bcol) high = t; 409 else low = t; 410 } 411 for ( i=low; i<high; i++ ) { 412 if (rp[i] > bcol) break; 413 if (rp[i] == bcol) { 414 bap = ap + bs2*i + bs*cidx + ridx; 415 if (is == ADD_VALUES) *bap += value; 416 else *bap = value; 417 goto noinsert; 418 } 419 } 420 if (nonew) goto noinsert; 421 if (nrow >= rmax) { 422 /* there is no extra room in row, therefore enlarge */ 423 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 424 Scalar *new_a; 425 426 /* malloc new storage space */ 427 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 428 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 429 new_j = (int *) (new_a + bs2*new_nz); 430 new_i = new_j + new_nz; 431 432 /* copy over old data into new slots */ 433 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} 434 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 435 PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); 436 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 437 PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, 438 len*sizeof(int)); 439 PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); 440 PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 441 PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), 442 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); 443 /* free up old matrix storage */ 444 PetscFree(a->a); 445 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 446 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 447 a->singlemalloc = 1; 448 449 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 450 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 451 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 452 a->maxnz += bs2*CHUNKSIZE; 453 a->reallocs++; 454 a->nz++; 455 } 456 N = nrow++ - 1; 457 /* shift up all the later entries in this row */ 458 for ( ii=N; ii>=i; ii-- ) { 459 rp[ii+1] = rp[ii]; 460 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 461 } 462 if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 463 rp[i] = bcol; 464 ap[bs2*i + bs*cidx + ridx] = value; 465 noinsert:; 466 low = i; 467 } 468 ailen[brow] = nrow; 469 } 470 return 0; 471 } 472 473 #undef __FUNC__ 474 #define __FUNC__ "MatSetValues_SeqBAIJ" 475 int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 476 { 477 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 478 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 479 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 480 int roworiented=a->roworiented; 481 int *aj=a->j,nonew=a->nonew; 482 int bs2=a->bs2,bs=a->bs,stepval; 483 Scalar *ap,*value=v,*aa=a->a,*bap; 484 485 if (roworiented) { 486 stepval = (n-1)*bs; 487 } else { 488 stepval = (m-1)*bs; 489 } 490 for ( k=0; k<m; k++ ) { /* loop over added rows */ 491 row = im[k]; 492 #if defined(PETSC_BOPT_g) 493 if (row < 0) SETERRQ(1,0,"Negative row"); 494 if (row >= a->mbs) SETERRQ(1,0,"Row too large"); 495 #endif 496 rp = aj + ai[row]; 497 ap = aa + bs2*ai[row]; 498 rmax = imax[row]; 499 nrow = ailen[row]; 500 low = 0; 501 for ( l=0; l<n; l++ ) { /* loop over added columns */ 502 #if defined(PETSC_BOPT_g) 503 if (in[l] < 0) SETERRQ(1,0,"Negative column"); 504 if (in[l] >= a->nbs) SETERRQ(1,0,"Column too large"); 505 #endif 506 col = in[l]; 507 if (roworiented) { 508 value = v + k*(stepval+bs)*bs + l*bs; 509 } else { 510 value = v + l*(stepval+bs)*bs + k*bs; 511 } 512 if (!sorted) low = 0; high = nrow; 513 while (high-low > 7) { 514 t = (low+high)/2; 515 if (rp[t] > col) high = t; 516 else low = t; 517 } 518 for ( i=low; i<high; i++ ) { 519 if (rp[i] > col) break; 520 if (rp[i] == col) { 521 bap = ap + bs2*i; 522 if (roworiented) { 523 if (is == ADD_VALUES) { 524 for ( ii=0; ii<bs; ii++,value+=stepval ) 525 for (jj=ii; jj<bs2; jj+=bs ) 526 bap[jj] += *value++; 527 } else { 528 for ( ii=0; ii<bs; ii++,value+=stepval ) 529 for (jj=ii; jj<bs2; jj+=bs ) 530 bap[jj] = *value++; 531 } 532 } else { 533 if (is == ADD_VALUES) { 534 for ( ii=0; ii<bs; ii++,value+=stepval ) 535 for (jj=0; jj<bs; jj++ ) 536 *bap++ += *value++; 537 } else { 538 for ( ii=0; ii<bs; ii++,value+=stepval ) 539 for (jj=0; jj<bs; jj++ ) 540 *bap++ = *value++; 541 } 542 } 543 goto noinsert; 544 } 545 } 546 if (nonew) goto noinsert; 547 if (nrow >= rmax) { 548 /* there is no extra room in row, therefore enlarge */ 549 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 550 Scalar *new_a; 551 552 /* malloc new storage space */ 553 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 554 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 555 new_j = (int *) (new_a + bs2*new_nz); 556 new_i = new_j + new_nz; 557 558 /* copy over old data into new slots */ 559 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} 560 for ( ii=row+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 561 PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int)); 562 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 563 PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow, 564 len*sizeof(int)); 565 PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(Scalar)); 566 PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 567 PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE), 568 aa+bs2*(ai[row]+nrow),bs2*len*sizeof(Scalar)); 569 /* free up old matrix storage */ 570 PetscFree(a->a); 571 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 572 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 573 a->singlemalloc = 1; 574 575 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 576 rmax = imax[row] = imax[row] + CHUNKSIZE; 577 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 578 a->maxnz += bs2*CHUNKSIZE; 579 a->reallocs++; 580 a->nz++; 581 } 582 N = nrow++ - 1; 583 /* shift up all the later entries in this row */ 584 for ( ii=N; ii>=i; ii-- ) { 585 rp[ii+1] = rp[ii]; 586 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 587 } 588 if (N >= i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 589 rp[i] = col; 590 bap = ap + bs2*i; 591 if (roworiented) { 592 for ( ii=0; ii<bs; ii++,value+=stepval) 593 for (jj=ii; jj<bs2; jj+=bs ) 594 bap[jj] = *value++; 595 } else { 596 for ( ii=0; ii<bs; ii++,value+=stepval ) 597 for (jj=0; jj<bs; jj++ ) 598 *bap++ = *value++; 599 } 600 noinsert:; 601 low = i; 602 } 603 ailen[row] = nrow; 604 } 605 return 0; 606 } 607 608 #undef __FUNC__ 609 #define __FUNC__ "MatGetSize_SeqBAIJ" 610 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 611 { 612 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 613 *m = a->m; *n = a->n; 614 return 0; 615 } 616 617 #undef __FUNC__ 618 #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ" 619 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 620 { 621 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 622 *m = 0; *n = a->m; 623 return 0; 624 } 625 626 #undef __FUNC__ 627 #define __FUNC__ "MatGetRow_SeqBAIJ" 628 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 629 { 630 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 631 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 632 Scalar *aa,*v_i,*aa_i; 633 634 bs = a->bs; 635 ai = a->i; 636 aj = a->j; 637 aa = a->a; 638 bs2 = a->bs2; 639 640 if (row < 0 || row >= a->m) SETERRQ(1,0,"Row out of range"); 641 642 bn = row/bs; /* Block number */ 643 bp = row % bs; /* Block Position */ 644 M = ai[bn+1] - ai[bn]; 645 *nz = bs*M; 646 647 if (v) { 648 *v = 0; 649 if (*nz) { 650 *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v); 651 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 652 v_i = *v + i*bs; 653 aa_i = aa + bs2*(ai[bn] + i); 654 for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];} 655 } 656 } 657 } 658 659 if (idx) { 660 *idx = 0; 661 if (*nz) { 662 *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx); 663 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 664 idx_i = *idx + i*bs; 665 itmp = bs*aj[ai[bn] + i]; 666 for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;} 667 } 668 } 669 } 670 return 0; 671 } 672 673 #undef __FUNC__ 674 #define __FUNC__ "MatRestoreRow_SeqBAIJ" 675 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 676 { 677 if (idx) {if (*idx) PetscFree(*idx);} 678 if (v) {if (*v) PetscFree(*v);} 679 return 0; 680 } 681 682 #undef __FUNC__ 683 #define __FUNC__ "MatTranspose_SeqBAIJ" 684 static int MatTranspose_SeqBAIJ(Mat A,Mat *B) 685 { 686 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 687 Mat C; 688 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 689 int *rows,*cols,bs2=a->bs2; 690 Scalar *array=a->a; 691 692 if (B==PETSC_NULL && mbs!=nbs) 693 SETERRQ(1,0,"Square matrix only for in-place"); 694 col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col); 695 PetscMemzero(col,(1+nbs)*sizeof(int)); 696 697 for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1; 698 ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr); 699 PetscFree(col); 700 rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows); 701 cols = rows + bs; 702 for ( i=0; i<mbs; i++ ) { 703 cols[0] = i*bs; 704 for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1; 705 len = ai[i+1] - ai[i]; 706 for ( j=0; j<len; j++ ) { 707 rows[0] = (*aj++)*bs; 708 for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1; 709 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr); 710 array += bs2; 711 } 712 } 713 PetscFree(rows); 714 715 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 716 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 717 718 if (B != PETSC_NULL) { 719 *B = C; 720 } else { 721 /* This isn't really an in-place transpose */ 722 PetscFree(a->a); 723 if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);} 724 if (a->diag) PetscFree(a->diag); 725 if (a->ilen) PetscFree(a->ilen); 726 if (a->imax) PetscFree(a->imax); 727 if (a->solve_work) PetscFree(a->solve_work); 728 PetscFree(a); 729 PetscMemcpy(A,C,sizeof(struct _Mat)); 730 PetscHeaderDestroy(C); 731 } 732 return 0; 733 } 734 735 736 #undef __FUNC__ 737 #define __FUNC__ "MatAssemblyEnd_SeqBAIJ" 738 static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 739 { 740 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 741 int fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax; 742 int m = a->m,*ip, N, *ailen = a->ilen; 743 int mbs = a->mbs, bs2 = a->bs2,rmax; 744 Scalar *aa = a->a, *ap; 745 746 if (mode == MAT_FLUSH_ASSEMBLY) return 0; 747 748 rmax = ailen[0]; 749 for ( i=1; i<mbs; i++ ) { 750 /* move each row back by the amount of empty slots (fshift) before it*/ 751 fshift += imax[i-1] - ailen[i-1]; 752 rmax = PetscMax(rmax,ailen[i]); 753 if (fshift) { 754 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 755 N = ailen[i]; 756 for ( j=0; j<N; j++ ) { 757 ip[j-fshift] = ip[j]; 758 PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar)); 759 } 760 } 761 ai[i] = ai[i-1] + ailen[i-1]; 762 } 763 if (mbs) { 764 fshift += imax[mbs-1] - ailen[mbs-1]; 765 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 766 } 767 /* reset ilen and imax for each row */ 768 for ( i=0; i<mbs; i++ ) { 769 ailen[i] = imax[i] = ai[i+1] - ai[i]; 770 } 771 a->nz = ai[mbs]; 772 773 /* diagonals may have moved, so kill the diagonal pointers */ 774 if (fshift && a->diag) { 775 PetscFree(a->diag); 776 PLogObjectMemory(A,-(m+1)*sizeof(int)); 777 a->diag = 0; 778 } 779 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 780 m,a->n,a->bs,fshift*bs2,a->nz*bs2); 781 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n", 782 a->reallocs); 783 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 784 a->reallocs = 0; 785 A->info.nz_unneeded = (double)fshift*bs2; 786 787 return 0; 788 } 789 790 #undef __FUNC__ 791 #define __FUNC__ "MatZeroEntries_SeqBAIJ" 792 static int MatZeroEntries_SeqBAIJ(Mat A) 793 { 794 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 795 PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar)); 796 return 0; 797 } 798 799 #undef __FUNC__ 800 #define __FUNC__ "MatDestroy_SeqBAIJ" 801 int MatDestroy_SeqBAIJ(PetscObject obj) 802 { 803 Mat A = (Mat) obj; 804 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 805 int ierr; 806 807 #if defined(PETSC_LOG) 808 PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 809 #endif 810 PetscFree(a->a); 811 if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);} 812 if (a->diag) PetscFree(a->diag); 813 if (a->ilen) PetscFree(a->ilen); 814 if (a->imax) PetscFree(a->imax); 815 if (a->solve_work) PetscFree(a->solve_work); 816 if (a->mult_work) PetscFree(a->mult_work); 817 PetscFree(a); 818 if (A->mapping) { 819 ierr = ISLocalToGlobalMappingDestroy(A->mapping); CHKERRQ(ierr); 820 } 821 PLogObjectDestroy(A); 822 PetscHeaderDestroy(A); 823 return 0; 824 } 825 826 #undef __FUNC__ 827 #define __FUNC__ "MatSetOption_SeqBAIJ" 828 static int MatSetOption_SeqBAIJ(Mat A,MatOption op) 829 { 830 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 831 if (op == MAT_ROW_ORIENTED) a->roworiented = 1; 832 else if (op == MAT_COLUMN_ORIENTED) a->roworiented = 0; 833 else if (op == MAT_COLUMNS_SORTED) a->sorted = 1; 834 else if (op == MAT_COLUMNS_UNSORTED) a->sorted = 0; 835 else if (op == MAT_NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 836 else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 837 else if (op == MAT_ROWS_SORTED || 838 op == MAT_ROWS_UNSORTED || 839 op == MAT_SYMMETRIC || 840 op == MAT_STRUCTURALLY_SYMMETRIC || 841 op == MAT_YES_NEW_DIAGONALS || 842 op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES) 843 PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n"); 844 else if (op == MAT_NO_NEW_DIAGONALS) 845 {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");} 846 else 847 {SETERRQ(PETSC_ERR_SUP,0,"unknown option");} 848 return 0; 849 } 850 851 852 /* -------------------------------------------------------*/ 853 /* Should check that shapes of vectors and matrices match */ 854 /* -------------------------------------------------------*/ 855 #include "pinclude/plapack.h" 856 857 #undef __FUNC__ 858 #define __FUNC__ "MatMult_SeqBAIJ_1" 859 static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz) 860 { 861 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 862 register Scalar *x,*z,*v,sum; 863 int mbs=a->mbs,i,*idx,*ii,n; 864 865 VecGetArray_Fast(xx,x); 866 VecGetArray_Fast(zz,z); 867 868 idx = a->j; 869 v = a->a; 870 ii = a->i; 871 872 for ( i=0; i<mbs; i++ ) { 873 n = ii[1] - ii[0]; ii++; 874 sum = 0.0; 875 while (n--) sum += *v++ * x[*idx++]; 876 z[i] = sum; 877 } 878 VecRestoreArray_Fast(xx,x); 879 VecRestoreArray_Fast(zz,z); 880 PLogFlops(2*a->nz - a->m); 881 return 0; 882 } 883 884 #undef __FUNC__ 885 #define __FUNC__ "MatMult_SeqBAIJ_2" 886 static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz) 887 { 888 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 889 register Scalar *x,*z,*v,*xb,sum1,sum2; 890 register Scalar x1,x2; 891 int mbs=a->mbs,i,*idx,*ii; 892 int j,n; 893 894 VecGetArray_Fast(xx,x); 895 VecGetArray_Fast(zz,z); 896 897 idx = a->j; 898 v = a->a; 899 ii = a->i; 900 901 for ( i=0; i<mbs; i++ ) { 902 n = ii[1] - ii[0]; ii++; 903 sum1 = 0.0; sum2 = 0.0; 904 for ( j=0; j<n; j++ ) { 905 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 906 sum1 += v[0]*x1 + v[2]*x2; 907 sum2 += v[1]*x1 + v[3]*x2; 908 v += 4; 909 } 910 z[0] = sum1; z[1] = sum2; 911 z += 2; 912 } 913 VecRestoreArray_Fast(xx,x); 914 VecRestoreArray_Fast(zz,z); 915 PLogFlops(4*a->nz - a->m); 916 return 0; 917 } 918 919 #undef __FUNC__ 920 #define __FUNC__ "MatMult_SeqBAIJ_3" 921 static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz) 922 { 923 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 924 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 925 int mbs=a->mbs,i,*idx,*ii,j,n; 926 927 VecGetArray_Fast(xx,x); 928 VecGetArray_Fast(zz,z); 929 930 idx = a->j; 931 v = a->a; 932 ii = a->i; 933 934 for ( i=0; i<mbs; i++ ) { 935 n = ii[1] - ii[0]; ii++; 936 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 937 for ( j=0; j<n; j++ ) { 938 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 939 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 940 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 941 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 942 v += 9; 943 } 944 z[0] = sum1; z[1] = sum2; z[2] = sum3; 945 z += 3; 946 } 947 VecRestoreArray_Fast(xx,x); 948 VecRestoreArray_Fast(zz,z); 949 PLogFlops(18*a->nz - a->m); 950 return 0; 951 } 952 953 #undef __FUNC__ 954 #define __FUNC__ "MatMult_SeqBAIJ_4" 955 static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz) 956 { 957 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 958 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4; 959 register Scalar x1,x2,x3,x4; 960 int mbs=a->mbs,i,*idx,*ii; 961 int j,n; 962 963 VecGetArray_Fast(xx,x); 964 VecGetArray_Fast(zz,z); 965 966 idx = a->j; 967 v = a->a; 968 ii = a->i; 969 970 for ( i=0; i<mbs; i++ ) { 971 n = ii[1] - ii[0]; ii++; 972 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 973 for ( j=0; j<n; j++ ) { 974 xb = x + 4*(*idx++); 975 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 976 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 977 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 978 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 979 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 980 v += 16; 981 } 982 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 983 z += 4; 984 } 985 VecRestoreArray_Fast(xx,x); 986 VecRestoreArray_Fast(zz,z); 987 PLogFlops(32*a->nz - a->m); 988 return 0; 989 } 990 991 #undef __FUNC__ 992 #define __FUNC__ "MatMult_SeqBAIJ_5" 993 static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz) 994 { 995 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 996 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5; 997 register Scalar x1,x2,x3,x4,x5; 998 int mbs=a->mbs,i,*idx,*ii,j,n; 999 1000 VecGetArray_Fast(xx,x); 1001 VecGetArray_Fast(zz,z); 1002 1003 idx = a->j; 1004 v = a->a; 1005 ii = a->i; 1006 1007 for ( i=0; i<mbs; i++ ) { 1008 n = ii[1] - ii[0]; ii++; 1009 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 1010 for ( j=0; j<n; j++ ) { 1011 xb = x + 5*(*idx++); 1012 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 1013 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 1014 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 1015 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 1016 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 1017 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 1018 v += 25; 1019 } 1020 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 1021 z += 5; 1022 } 1023 VecRestoreArray_Fast(xx,x); 1024 VecRestoreArray_Fast(zz,z); 1025 PLogFlops(50*a->nz - a->m); 1026 return 0; 1027 } 1028 1029 #undef __FUNC__ 1030 #define __FUNC__ "MatMult_SeqBAIJ_7" 1031 static int MatMult_SeqBAIJ_7(Mat A,Vec xx,Vec zz) 1032 { 1033 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1034 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7; 1035 register Scalar x1,x2,x3,x4,x5,x6,x7; 1036 int mbs=a->mbs,i,*idx,*ii,j,n; 1037 1038 VecGetArray_Fast(xx,x); 1039 VecGetArray_Fast(zz,z); 1040 1041 idx = a->j; 1042 v = a->a; 1043 ii = a->i; 1044 1045 for ( i=0; i<mbs; i++ ) { 1046 n = ii[1] - ii[0]; ii++; 1047 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0; 1048 for ( j=0; j<n; j++ ) { 1049 xb = x + 7*(*idx++); 1050 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6]; 1051 sum1 += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 1052 sum2 += v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 1053 sum3 += v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 1054 sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 1055 sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 1056 sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 1057 sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 1058 v += 49; 1059 } 1060 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7; 1061 z += 7; 1062 } 1063 1064 VecRestoreArray_Fast(xx,x); 1065 VecRestoreArray_Fast(zz,z); 1066 PLogFlops(98*a->nz - a->m); 1067 return 0; 1068 } 1069 1070 #undef __FUNC__ 1071 #define __FUNC__ "MatMult_SeqBAIJ_N" 1072 static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz) 1073 { 1074 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1075 register Scalar *x,*z,*v,*xb; 1076 int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2; 1077 int _One = 1,ncols,k; 1078 Scalar _DOne = 1.0,*work,*workt,_DZero = 0.0; 1079 1080 VecGetArray_Fast(xx,x); 1081 VecGetArray_Fast(zz,z); 1082 1083 idx = a->j; 1084 v = a->a; 1085 ii = a->i; 1086 1087 1088 if (!a->mult_work) { 1089 k = PetscMax(a->m,a->n); 1090 a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work); 1091 } 1092 work = a->mult_work; 1093 for ( i=0; i<mbs; i++ ) { 1094 n = ii[1] - ii[0]; ii++; 1095 ncols = n*bs; 1096 workt = work; 1097 for ( j=0; j<n; j++ ) { 1098 xb = x + bs*(*idx++); 1099 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 1100 workt += bs; 1101 } 1102 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One); 1103 v += n*bs2; 1104 z += bs; 1105 } 1106 VecRestoreArray_Fast(xx,x); 1107 VecRestoreArray_Fast(zz,z); 1108 PLogFlops(2*a->nz*bs2 - a->m); 1109 return 0; 1110 } 1111 1112 #undef __FUNC__ 1113 #define __FUNC__ "MatMultAdd_SeqBAIJ_1" 1114 static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 1115 { 1116 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1117 register Scalar *x,*y,*z,*v,sum; 1118 int mbs=a->mbs,i,*idx,*ii,n; 1119 1120 VecGetArray_Fast(xx,x); 1121 VecGetArray_Fast(yy,y); 1122 VecGetArray_Fast(zz,z); 1123 1124 idx = a->j; 1125 v = a->a; 1126 ii = a->i; 1127 1128 for ( i=0; i<mbs; i++ ) { 1129 n = ii[1] - ii[0]; ii++; 1130 sum = y[i]; 1131 while (n--) sum += *v++ * x[*idx++]; 1132 z[i] = sum; 1133 } 1134 VecRestoreArray_Fast(xx,x); 1135 VecRestoreArray_Fast(yy,y); 1136 VecRestoreArray_Fast(zz,z); 1137 PLogFlops(2*a->nz); 1138 return 0; 1139 } 1140 1141 #undef __FUNC__ 1142 #define __FUNC__ "MatMultAdd_SeqBAIJ_2" 1143 static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 1144 { 1145 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1146 register Scalar *x,*y,*z,*v,*xb,sum1,sum2; 1147 register Scalar x1,x2; 1148 int mbs=a->mbs,i,*idx,*ii; 1149 int j,n; 1150 1151 VecGetArray_Fast(xx,x); 1152 VecGetArray_Fast(yy,y); 1153 VecGetArray_Fast(zz,z); 1154 1155 idx = a->j; 1156 v = a->a; 1157 ii = a->i; 1158 1159 for ( i=0; i<mbs; i++ ) { 1160 n = ii[1] - ii[0]; ii++; 1161 sum1 = y[0]; sum2 = y[1]; 1162 for ( j=0; j<n; j++ ) { 1163 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 1164 sum1 += v[0]*x1 + v[2]*x2; 1165 sum2 += v[1]*x1 + v[3]*x2; 1166 v += 4; 1167 } 1168 z[0] = sum1; z[1] = sum2; 1169 z += 2; y += 2; 1170 } 1171 VecRestoreArray_Fast(xx,x); 1172 VecRestoreArray_Fast(yy,y); 1173 VecRestoreArray_Fast(zz,z); 1174 PLogFlops(4*a->nz); 1175 return 0; 1176 } 1177 1178 #undef __FUNC__ 1179 #define __FUNC__ "MatMultAdd_SeqBAIJ_3" 1180 static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 1181 { 1182 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1183 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 1184 int mbs=a->mbs,i,*idx,*ii,j,n; 1185 1186 VecGetArray_Fast(xx,x); 1187 VecGetArray_Fast(yy,y); 1188 VecGetArray_Fast(zz,z); 1189 1190 idx = a->j; 1191 v = a->a; 1192 ii = a->i; 1193 1194 for ( i=0; i<mbs; i++ ) { 1195 n = ii[1] - ii[0]; ii++; 1196 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; 1197 for ( j=0; j<n; j++ ) { 1198 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1199 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 1200 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 1201 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 1202 v += 9; 1203 } 1204 z[0] = sum1; z[1] = sum2; z[2] = sum3; 1205 z += 3; y += 3; 1206 } 1207 VecRestoreArray_Fast(xx,x); 1208 VecRestoreArray_Fast(yy,y); 1209 VecRestoreArray_Fast(zz,z); 1210 PLogFlops(18*a->nz); 1211 return 0; 1212 } 1213 1214 #undef __FUNC__ 1215 #define __FUNC__ "MatMultAdd_SeqBAIJ_4" 1216 static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 1217 { 1218 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1219 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4; 1220 register Scalar x1,x2,x3,x4; 1221 int mbs=a->mbs,i,*idx,*ii; 1222 int j,n; 1223 1224 VecGetArray_Fast(xx,x); 1225 VecGetArray_Fast(yy,y); 1226 VecGetArray_Fast(zz,z); 1227 1228 idx = a->j; 1229 v = a->a; 1230 ii = a->i; 1231 1232 for ( i=0; i<mbs; i++ ) { 1233 n = ii[1] - ii[0]; ii++; 1234 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; 1235 for ( j=0; j<n; j++ ) { 1236 xb = x + 4*(*idx++); 1237 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1238 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 1239 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 1240 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 1241 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 1242 v += 16; 1243 } 1244 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 1245 z += 4; y += 4; 1246 } 1247 VecRestoreArray_Fast(xx,x); 1248 VecRestoreArray_Fast(yy,y); 1249 VecRestoreArray_Fast(zz,z); 1250 PLogFlops(32*a->nz); 1251 return 0; 1252 } 1253 1254 #undef __FUNC__ 1255 #define __FUNC__ "MatMultAdd_SeqBAIJ_5" 1256 static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 1257 { 1258 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1259 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5; 1260 register Scalar x1,x2,x3,x4,x5; 1261 int mbs=a->mbs,i,*idx,*ii,j,n; 1262 1263 VecGetArray_Fast(xx,x); 1264 VecGetArray_Fast(yy,y); 1265 VecGetArray_Fast(zz,z); 1266 1267 idx = a->j; 1268 v = a->a; 1269 ii = a->i; 1270 1271 for ( i=0; i<mbs; i++ ) { 1272 n = ii[1] - ii[0]; ii++; 1273 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; 1274 for ( j=0; j<n; j++ ) { 1275 xb = x + 5*(*idx++); 1276 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 1277 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 1278 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 1279 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 1280 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 1281 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 1282 v += 25; 1283 } 1284 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 1285 z += 5; y += 5; 1286 } 1287 VecRestoreArray_Fast(xx,x); 1288 VecRestoreArray_Fast(yy,y); 1289 VecRestoreArray_Fast(zz,z); 1290 PLogFlops(50*a->nz); 1291 return 0; 1292 } 1293 1294 #undef __FUNC__ 1295 #define __FUNC__ "MatMultAdd_SeqBAIJ_7" 1296 static int MatMultAdd_SeqBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1297 { 1298 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1299 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7; 1300 register Scalar x1,x2,x3,x4,x5,x6,x7; 1301 int mbs=a->mbs,i,*idx,*ii,j,n; 1302 1303 VecGetArray_Fast(xx,x); 1304 VecGetArray_Fast(yy,y); 1305 VecGetArray_Fast(zz,z); 1306 1307 idx = a->j; 1308 v = a->a; 1309 ii = a->i; 1310 1311 for ( i=0; i<mbs; i++ ) { 1312 n = ii[1] - ii[0]; ii++; 1313 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; sum7 = y[6]; 1314 for ( j=0; j<n; j++ ) { 1315 xb = x + 7*(*idx++); 1316 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6]; 1317 sum1 += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 1318 sum2 += v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 1319 sum3 += v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 1320 sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 1321 sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 1322 sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 1323 sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 1324 v += 49; 1325 } 1326 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7; 1327 z += 7; y += 7; 1328 } 1329 VecRestoreArray_Fast(xx,x); 1330 VecRestoreArray_Fast(yy,y); 1331 VecRestoreArray_Fast(zz,z); 1332 PLogFlops(98*a->nz); 1333 return 0; 1334 } 1335 1336 1337 #undef __FUNC__ 1338 #define __FUNC__ "MatMultAdd_SeqBAIJ_N" 1339 static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 1340 { 1341 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1342 register Scalar *x,*z,*v,*xb; 1343 int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr; 1344 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1345 1346 if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 1347 1348 VecGetArray_Fast(xx,x); 1349 VecGetArray_Fast(zz,z); 1350 1351 idx = a->j; 1352 v = a->a; 1353 ii = a->i; 1354 1355 1356 if (!a->mult_work) { 1357 k = PetscMax(a->m,a->n); 1358 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work); 1359 } 1360 work = a->mult_work; 1361 for ( i=0; i<mbs; i++ ) { 1362 n = ii[1] - ii[0]; ii++; 1363 ncols = n*bs; 1364 workt = work; 1365 for ( j=0; j<n; j++ ) { 1366 xb = x + bs*(*idx++); 1367 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 1368 workt += bs; 1369 } 1370 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); 1371 v += n*bs2; 1372 z += bs; 1373 } 1374 VecRestoreArray_Fast(xx,x); 1375 VecRestoreArray_Fast(zz,z); 1376 PLogFlops(2*a->nz*bs2 ); 1377 return 0; 1378 } 1379 1380 #undef __FUNC__ 1381 #define __FUNC__ "MatMultTrans_SeqBAIJ" 1382 static int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz) 1383 { 1384 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1385 Scalar *xg,*zg,*zb; 1386 register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 1387 int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 1388 int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1389 1390 1391 VecGetArray_Fast(xx,xg); x = xg; 1392 VecGetArray_Fast(zz,zg); z = zg; 1393 PetscMemzero(z,N*sizeof(Scalar)); 1394 1395 idx = a->j; 1396 v = a->a; 1397 ii = a->i; 1398 1399 switch (bs) { 1400 case 1: 1401 for ( i=0; i<mbs; i++ ) { 1402 n = ii[1] - ii[0]; ii++; 1403 xb = x + i; x1 = xb[0]; 1404 ib = idx + ai[i]; 1405 for ( j=0; j<n; j++ ) { 1406 rval = ib[j]; 1407 z[rval] += *v++ * x1; 1408 } 1409 } 1410 break; 1411 case 2: 1412 for ( i=0; i<mbs; i++ ) { 1413 n = ii[1] - ii[0]; ii++; 1414 xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1415 ib = idx + ai[i]; 1416 for ( j=0; j<n; j++ ) { 1417 rval = ib[j]*2; 1418 z[rval++] += v[0]*x1 + v[1]*x2; 1419 z[rval++] += v[2]*x1 + v[3]*x2; 1420 v += 4; 1421 } 1422 } 1423 break; 1424 case 3: 1425 for ( i=0; i<mbs; i++ ) { 1426 n = ii[1] - ii[0]; ii++; 1427 xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1428 ib = idx + ai[i]; 1429 for ( j=0; j<n; j++ ) { 1430 rval = ib[j]*3; 1431 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1432 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1433 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1434 v += 9; 1435 } 1436 } 1437 break; 1438 case 4: 1439 for ( i=0; i<mbs; i++ ) { 1440 n = ii[1] - ii[0]; ii++; 1441 xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1442 ib = idx + ai[i]; 1443 for ( j=0; j<n; j++ ) { 1444 rval = ib[j]*4; 1445 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1446 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1447 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1448 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1449 v += 16; 1450 } 1451 } 1452 break; 1453 case 5: 1454 for ( i=0; i<mbs; i++ ) { 1455 n = ii[1] - ii[0]; ii++; 1456 xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1457 x4 = xb[3]; x5 = xb[4]; 1458 ib = idx + ai[i]; 1459 for ( j=0; j<n; j++ ) { 1460 rval = ib[j]*5; 1461 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1462 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1463 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1464 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1465 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1466 v += 25; 1467 } 1468 } 1469 break; 1470 /* block sizes larger then 5 by 5 are handled by BLAS */ 1471 default: { 1472 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1473 if (!a->mult_work) { 1474 k = PetscMax(a->m,a->n); 1475 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1476 CHKPTRQ(a->mult_work); 1477 } 1478 work = a->mult_work; 1479 for ( i=0; i<mbs; i++ ) { 1480 n = ii[1] - ii[0]; ii++; 1481 ncols = n*bs; 1482 PetscMemzero(work,ncols*sizeof(Scalar)); 1483 LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1484 v += n*bs2; 1485 x += bs; 1486 workt = work; 1487 for ( j=0; j<n; j++ ) { 1488 zb = z + bs*(*idx++); 1489 for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1490 workt += bs; 1491 } 1492 } 1493 } 1494 } 1495 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 1496 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1497 PLogFlops(2*a->nz*a->bs2 - a->n); 1498 return 0; 1499 } 1500 1501 #undef __FUNC__ 1502 #define __FUNC__ "MatMultTransAdd_SeqBAIJ" 1503 static int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1504 { 1505 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1506 Scalar *xg,*zg,*zb; 1507 register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 1508 int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 1509 int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1510 1511 1512 1513 VecGetArray_Fast(xx,xg); x = xg; 1514 VecGetArray_Fast(zz,zg); z = zg; 1515 1516 if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 1517 else PetscMemzero(z,N*sizeof(Scalar)); 1518 1519 1520 idx = a->j; 1521 v = a->a; 1522 ii = a->i; 1523 1524 switch (bs) { 1525 case 1: 1526 for ( i=0; i<mbs; i++ ) { 1527 n = ii[1] - ii[0]; ii++; 1528 xb = x + i; x1 = xb[0]; 1529 ib = idx + ai[i]; 1530 for ( j=0; j<n; j++ ) { 1531 rval = ib[j]; 1532 z[rval] += *v++ * x1; 1533 } 1534 } 1535 break; 1536 case 2: 1537 for ( i=0; i<mbs; i++ ) { 1538 n = ii[1] - ii[0]; ii++; 1539 xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1540 ib = idx + ai[i]; 1541 for ( j=0; j<n; j++ ) { 1542 rval = ib[j]*2; 1543 z[rval++] += v[0]*x1 + v[1]*x2; 1544 z[rval++] += v[2]*x1 + v[3]*x2; 1545 v += 4; 1546 } 1547 } 1548 break; 1549 case 3: 1550 for ( i=0; i<mbs; i++ ) { 1551 n = ii[1] - ii[0]; ii++; 1552 xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1553 ib = idx + ai[i]; 1554 for ( j=0; j<n; j++ ) { 1555 rval = ib[j]*3; 1556 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1557 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1558 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1559 v += 9; 1560 } 1561 } 1562 break; 1563 case 4: 1564 for ( i=0; i<mbs; i++ ) { 1565 n = ii[1] - ii[0]; ii++; 1566 xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1567 ib = idx + ai[i]; 1568 for ( j=0; j<n; j++ ) { 1569 rval = ib[j]*4; 1570 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1571 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1572 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1573 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1574 v += 16; 1575 } 1576 } 1577 break; 1578 case 5: 1579 for ( i=0; i<mbs; i++ ) { 1580 n = ii[1] - ii[0]; ii++; 1581 xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1582 x4 = xb[3]; x5 = xb[4]; 1583 ib = idx + ai[i]; 1584 for ( j=0; j<n; j++ ) { 1585 rval = ib[j]*5; 1586 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1587 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1588 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1589 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1590 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1591 v += 25; 1592 } 1593 } 1594 break; 1595 /* block sizes larger then 5 by 5 are handled by BLAS */ 1596 default: { 1597 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1598 if (!a->mult_work) { 1599 k = PetscMax(a->m,a->n); 1600 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1601 CHKPTRQ(a->mult_work); 1602 } 1603 work = a->mult_work; 1604 for ( i=0; i<mbs; i++ ) { 1605 n = ii[1] - ii[0]; ii++; 1606 ncols = n*bs; 1607 PetscMemzero(work,ncols*sizeof(Scalar)); 1608 LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1609 v += n*bs2; 1610 x += bs; 1611 workt = work; 1612 for ( j=0; j<n; j++ ) { 1613 zb = z + bs*(*idx++); 1614 for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1615 workt += bs; 1616 } 1617 } 1618 } 1619 } 1620 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 1621 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1622 PLogFlops(2*a->nz*a->bs2); 1623 return 0; 1624 } 1625 1626 #undef __FUNC__ 1627 #define __FUNC__ "MatGetInfo_SeqBAIJ" 1628 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info) 1629 { 1630 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1631 1632 info->rows_global = (double)a->m; 1633 info->columns_global = (double)a->n; 1634 info->rows_local = (double)a->m; 1635 info->columns_local = (double)a->n; 1636 info->block_size = a->bs2; 1637 info->nz_allocated = a->maxnz; 1638 info->nz_used = a->bs2*a->nz; 1639 info->nz_unneeded = (double)(info->nz_allocated - info->nz_used); 1640 /* if (info->nz_unneeded != A->info.nz_unneeded) 1641 printf("space descrepancy: maxnz-nz = %d, nz_unneeded = %d\n",(int)info->nz_unneeded,(int)A->info.nz_unneeded); */ 1642 info->assemblies = A->num_ass; 1643 info->mallocs = a->reallocs; 1644 info->memory = A->mem; 1645 if (A->factor) { 1646 info->fill_ratio_given = A->info.fill_ratio_given; 1647 info->fill_ratio_needed = A->info.fill_ratio_needed; 1648 info->factor_mallocs = A->info.factor_mallocs; 1649 } else { 1650 info->fill_ratio_given = 0; 1651 info->fill_ratio_needed = 0; 1652 info->factor_mallocs = 0; 1653 } 1654 return 0; 1655 } 1656 1657 #undef __FUNC__ 1658 #define __FUNC__ "MatEqual_SeqBAIJ" 1659 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg) 1660 { 1661 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 1662 1663 if (B->type !=MATSEQBAIJ)SETERRQ(1,0,"Matrices must be same type"); 1664 1665 /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 1666 if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)|| 1667 (a->nz != b->nz)) { 1668 *flg = PETSC_FALSE; return 0; 1669 } 1670 1671 /* if the a->i are the same */ 1672 if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) { 1673 *flg = PETSC_FALSE; return 0; 1674 } 1675 1676 /* if a->j are the same */ 1677 if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) { 1678 *flg = PETSC_FALSE; return 0; 1679 } 1680 1681 /* if a->a are the same */ 1682 if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) { 1683 *flg = PETSC_FALSE; return 0; 1684 } 1685 *flg = PETSC_TRUE; 1686 return 0; 1687 1688 } 1689 1690 #undef __FUNC__ 1691 #define __FUNC__ "MatGetDiagonal_SeqBAIJ" 1692 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v) 1693 { 1694 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1695 int i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 1696 Scalar *x, zero = 0.0,*aa,*aa_j; 1697 1698 if (A->factor) SETERRQ(1,0,"Not for factored matrix"); 1699 bs = a->bs; 1700 aa = a->a; 1701 ai = a->i; 1702 aj = a->j; 1703 ambs = a->mbs; 1704 bs2 = a->bs2; 1705 1706 VecSet(&zero,v); 1707 VecGetArray_Fast(v,x); VecGetLocalSize_Fast(v,n); 1708 if (n != a->m) SETERRQ(1,0,"Nonconforming matrix and vector"); 1709 for ( i=0; i<ambs; i++ ) { 1710 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1711 if (aj[j] == i) { 1712 row = i*bs; 1713 aa_j = aa+j*bs2; 1714 for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 1715 break; 1716 } 1717 } 1718 } 1719 return 0; 1720 } 1721 1722 #undef __FUNC__ 1723 #define __FUNC__ "MatDiagonalScale_SeqBAIJ" 1724 static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr) 1725 { 1726 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1727 Scalar *l,*r,x,*v,*aa,*li,*ri; 1728 int i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2; 1729 1730 ai = a->i; 1731 aj = a->j; 1732 aa = a->a; 1733 m = a->m; 1734 n = a->n; 1735 bs = a->bs; 1736 mbs = a->mbs; 1737 bs2 = a->bs2; 1738 if (ll) { 1739 VecGetArray_Fast(ll,l); VecGetLocalSize_Fast(ll,lm); 1740 if (lm != m) SETERRQ(1,0,"Left scaling vector wrong length"); 1741 for ( i=0; i<mbs; i++ ) { /* for each block row */ 1742 M = ai[i+1] - ai[i]; 1743 li = l + i*bs; 1744 v = aa + bs2*ai[i]; 1745 for ( j=0; j<M; j++ ) { /* for each block */ 1746 for ( k=0; k<bs2; k++ ) { 1747 (*v++) *= li[k%bs]; 1748 } 1749 } 1750 } 1751 } 1752 1753 if (rr) { 1754 VecGetArray_Fast(rr,r); VecGetLocalSize_Fast(rr,rn); 1755 if (rn != n) SETERRQ(1,0,"Right scaling vector wrong length"); 1756 for ( i=0; i<mbs; i++ ) { /* for each block row */ 1757 M = ai[i+1] - ai[i]; 1758 v = aa + bs2*ai[i]; 1759 for ( j=0; j<M; j++ ) { /* for each block */ 1760 ri = r + bs*aj[ai[i]+j]; 1761 for ( k=0; k<bs; k++ ) { 1762 x = ri[k]; 1763 for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x; 1764 } 1765 } 1766 } 1767 } 1768 return 0; 1769 } 1770 1771 1772 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 1773 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 1774 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int); 1775 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*); 1776 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**); 1777 1778 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec); 1779 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec); 1780 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec); 1781 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec); 1782 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec); 1783 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec); 1784 extern int MatSolve_SeqBAIJ_7(Mat,Vec,Vec); 1785 1786 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 1787 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 1788 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 1789 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 1790 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 1791 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 1792 1793 #undef __FUNC__ 1794 #define __FUNC__ "MatNorm_SeqBAIJ" 1795 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm) 1796 { 1797 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1798 Scalar *v = a->a; 1799 double sum = 0.0; 1800 int i,nz=a->nz,bs2=a->bs2; 1801 1802 if (type == NORM_FROBENIUS) { 1803 for (i=0; i< bs2*nz; i++ ) { 1804 #if defined(PETSC_COMPLEX) 1805 sum += real(conj(*v)*(*v)); v++; 1806 #else 1807 sum += (*v)*(*v); v++; 1808 #endif 1809 } 1810 *norm = sqrt(sum); 1811 } 1812 else { 1813 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 1814 } 1815 return 0; 1816 } 1817 1818 /* 1819 note: This can only work for identity for row and col. It would 1820 be good to check this and otherwise generate an error. 1821 */ 1822 #undef __FUNC__ 1823 #define __FUNC__ "MatILUFactor_SeqBAIJ" 1824 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill) 1825 { 1826 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1827 Mat outA; 1828 int ierr; 1829 1830 if (fill != 0) SETERRQ(1,0,"Only fill=0 supported"); 1831 1832 outA = inA; 1833 inA->factor = FACTOR_LU; 1834 a->row = row; 1835 a->col = col; 1836 1837 a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 1838 1839 if (!a->diag) { 1840 ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr); 1841 } 1842 ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr); 1843 return 0; 1844 } 1845 1846 #undef __FUNC__ 1847 #define __FUNC__ "MatScale_SeqBAIJ" 1848 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA) 1849 { 1850 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1851 int one = 1, totalnz = a->bs2*a->nz; 1852 BLscal_( &totalnz, alpha, a->a, &one ); 1853 PLogFlops(totalnz); 1854 return 0; 1855 } 1856 1857 #undef __FUNC__ 1858 #define __FUNC__ "MatGetValues_SeqBAIJ" 1859 static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 1860 { 1861 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1862 int *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 1863 int *ai = a->i, *ailen = a->ilen; 1864 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 1865 Scalar *ap, *aa = a->a, zero = 0.0; 1866 1867 for ( k=0; k<m; k++ ) { /* loop over rows */ 1868 row = im[k]; brow = row/bs; 1869 if (row < 0) SETERRQ(1,0,"Negative row"); 1870 if (row >= a->m) SETERRQ(1,0,"Row too large"); 1871 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1872 nrow = ailen[brow]; 1873 for ( l=0; l<n; l++ ) { /* loop over columns */ 1874 if (in[l] < 0) SETERRQ(1,0,"Negative column"); 1875 if (in[l] >= a->n) SETERRQ(1,0,"Column too large"); 1876 col = in[l] ; 1877 bcol = col/bs; 1878 cidx = col%bs; 1879 ridx = row%bs; 1880 high = nrow; 1881 low = 0; /* assume unsorted */ 1882 while (high-low > 5) { 1883 t = (low+high)/2; 1884 if (rp[t] > bcol) high = t; 1885 else low = t; 1886 } 1887 for ( i=low; i<high; i++ ) { 1888 if (rp[i] > bcol) break; 1889 if (rp[i] == bcol) { 1890 *v++ = ap[bs2*i+bs*cidx+ridx]; 1891 goto finished; 1892 } 1893 } 1894 *v++ = zero; 1895 finished:; 1896 } 1897 } 1898 return 0; 1899 } 1900 1901 #undef __FUNC__ 1902 #define __FUNC__ "MatGetBlockSize_SeqBAIJ" 1903 static int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs) 1904 { 1905 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data; 1906 *bs = baij->bs; 1907 return 0; 1908 } 1909 1910 /* idx should be of length atlease bs */ 1911 #undef __FUNC__ 1912 #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Block" 1913 static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg) 1914 { 1915 int i,row; 1916 row = idx[0]; 1917 if (row%bs!=0) { *flg = PETSC_FALSE; return 0; } 1918 1919 for ( i=1; i<bs; i++ ) { 1920 if (row+i != idx[i]) { *flg = PETSC_FALSE; return 0; } 1921 } 1922 *flg = PETSC_TRUE; 1923 return 0; 1924 } 1925 1926 #undef __FUNC__ 1927 #define __FUNC__ "MatZeroRows_SeqBAIJ" 1928 static int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag) 1929 { 1930 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1931 IS is_local; 1932 int ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2; 1933 PetscTruth flg; 1934 Scalar zero = 0.0,*aa; 1935 1936 /* Make a copy of the IS and sort it */ 1937 ierr = ISGetSize(is,&is_n);CHKERRQ(ierr); 1938 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1939 ierr = ISCreateGeneral(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr); 1940 ierr = ISSort(is_local); CHKERRQ(ierr); 1941 ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr); 1942 1943 i = 0; 1944 while (i < is_n) { 1945 if (rows[i]<0 || rows[i]>m) SETERRQ(1,0,"row out of range"); 1946 flg = PETSC_FALSE; 1947 if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); } 1948 if (flg) { /* There exists a block of rows to be Zerowed */ 1949 baij->ilen[rows[i]/bs] = 0; 1950 i += bs; 1951 } else { /* Zero out only the requested row */ 1952 count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs; 1953 aa = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs); 1954 for ( j=0; j<count; j++ ) { 1955 aa[0] = zero; 1956 aa+=bs; 1957 } 1958 i++; 1959 } 1960 } 1961 if (diag) { 1962 for ( j=0; j<is_n; j++ ) { 1963 ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1964 } 1965 } 1966 ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr); 1967 ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr); 1968 ierr = ISDestroy(is_local); CHKERRQ(ierr); 1969 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1970 1971 return 0; 1972 } 1973 1974 #undef __FUNC__ 1975 #define __FUNC__ "MatPrintHelp_SeqBAIJ" 1976 int MatPrintHelp_SeqBAIJ(Mat A) 1977 { 1978 static int called = 0; 1979 MPI_Comm comm = A->comm; 1980 1981 if (called) return 0; else called = 1; 1982 PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n"); 1983 PetscPrintf(comm," -mat_block_size <block_size>\n"); 1984 return 0; 1985 } 1986 1987 /* -------------------------------------------------------------------*/ 1988 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ, 1989 MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ, 1990 MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N, 1991 MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ, 1992 MatSolve_SeqBAIJ_N,0, 1993 0,0, 1994 MatLUFactor_SeqBAIJ,0, 1995 0, 1996 MatTranspose_SeqBAIJ, 1997 MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ, 1998 MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ, 1999 0,MatAssemblyEnd_SeqBAIJ, 2000 0, 2001 MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ, 2002 MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0, 2003 MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ, 2004 MatILUFactorSymbolic_SeqBAIJ,0, 2005 0,0, 2006 MatConvertSameType_SeqBAIJ,0,0, 2007 MatILUFactor_SeqBAIJ,0,0, 2008 MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ, 2009 MatGetValues_SeqBAIJ,0, 2010 MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ, 2011 0,0,0,MatGetBlockSize_SeqBAIJ, 2012 MatGetRowIJ_SeqBAIJ, 2013 MatRestoreRowIJ_SeqBAIJ, 2014 0,0,0,0,0,0, 2015 MatSetValuesBlocked_SeqBAIJ}; 2016 2017 #undef __FUNC__ 2018 #define __FUNC__ "MatCreateSeqBAIJ" 2019 /*@C 2020 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 2021 compressed row) format. For good matrix assembly performance the 2022 user should preallocate the matrix storage by setting the parameter nz 2023 (or the array nzz). By setting these parameters accurately, performance 2024 during matrix assembly can be increased by more than a factor of 50. 2025 2026 Input Parameters: 2027 . comm - MPI communicator, set to MPI_COMM_SELF 2028 . bs - size of block 2029 . m - number of rows 2030 . n - number of columns 2031 . nz - number of block nonzeros per block row (same for all rows) 2032 . nzz - array containing the number of block nonzeros in the various block rows 2033 (possibly different for each block row) or PETSC_NULL 2034 2035 Output Parameter: 2036 . A - the matrix 2037 2038 Options Database Keys: 2039 $ -mat_no_unroll - uses code that does not unroll the loops in the 2040 $ block calculations (much solver) 2041 $ -mat_block_size - size of the blocks to use 2042 2043 Notes: 2044 The block AIJ format is fully compatible with standard Fortran 77 2045 storage. That is, the stored row and column indices can begin at 2046 either one (as in Fortran) or zero. See the users' manual for details. 2047 2048 Specify the preallocated storage with either nz or nnz (not both). 2049 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2050 allocation. For additional details, see the users manual chapter on 2051 matrices. 2052 2053 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2054 @*/ 2055 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 2056 { 2057 Mat B; 2058 Mat_SeqBAIJ *b; 2059 int i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs,size; 2060 2061 MPI_Comm_size(comm,&size); 2062 if (size > 1) SETERRQ(1,0,"Comm must be of size 1"); 2063 2064 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr); 2065 2066 if (mbs*bs!=m || nbs*bs!=n) 2067 SETERRQ(1,0,"Number rows, cols must be divisible by blocksize"); 2068 2069 *A = 0; 2070 PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm); 2071 PLogObjectCreate(B); 2072 B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b); 2073 PetscMemzero(b,sizeof(Mat_SeqBAIJ)); 2074 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 2075 ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr); 2076 if (!flg) { 2077 switch (bs) { 2078 case 1: 2079 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2080 B->ops.solve = MatSolve_SeqBAIJ_1; 2081 B->ops.mult = MatMult_SeqBAIJ_1; 2082 B->ops.multadd = MatMultAdd_SeqBAIJ_1; 2083 break; 2084 case 2: 2085 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2086 B->ops.solve = MatSolve_SeqBAIJ_2; 2087 B->ops.mult = MatMult_SeqBAIJ_2; 2088 B->ops.multadd = MatMultAdd_SeqBAIJ_2; 2089 break; 2090 case 3: 2091 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2092 B->ops.solve = MatSolve_SeqBAIJ_3; 2093 B->ops.mult = MatMult_SeqBAIJ_3; 2094 B->ops.multadd = MatMultAdd_SeqBAIJ_3; 2095 break; 2096 case 4: 2097 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2098 B->ops.solve = MatSolve_SeqBAIJ_4; 2099 B->ops.mult = MatMult_SeqBAIJ_4; 2100 B->ops.multadd = MatMultAdd_SeqBAIJ_4; 2101 break; 2102 case 5: 2103 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2104 B->ops.solve = MatSolve_SeqBAIJ_5; 2105 B->ops.mult = MatMult_SeqBAIJ_5; 2106 B->ops.multadd = MatMultAdd_SeqBAIJ_5; 2107 break; 2108 case 7: 2109 B->ops.mult = MatMult_SeqBAIJ_7; 2110 B->ops.solve = MatSolve_SeqBAIJ_7; 2111 B->ops.multadd = MatMultAdd_SeqBAIJ_7; 2112 break; 2113 } 2114 } 2115 B->destroy = MatDestroy_SeqBAIJ; 2116 B->view = MatView_SeqBAIJ; 2117 B->factor = 0; 2118 B->lupivotthreshold = 1.0; 2119 B->mapping = 0; 2120 b->row = 0; 2121 b->col = 0; 2122 b->reallocs = 0; 2123 2124 b->m = m; B->m = m; B->M = m; 2125 b->n = n; B->n = n; B->N = n; 2126 b->mbs = mbs; 2127 b->nbs = nbs; 2128 b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax); 2129 if (nnz == PETSC_NULL) { 2130 if (nz == PETSC_DEFAULT) nz = 5; 2131 else if (nz <= 0) nz = 1; 2132 for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 2133 nz = nz*mbs; 2134 } 2135 else { 2136 nz = 0; 2137 for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 2138 } 2139 2140 /* allocate the matrix space */ 2141 len = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int); 2142 b->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a); 2143 PetscMemzero(b->a,nz*bs2*sizeof(Scalar)); 2144 b->j = (int *) (b->a + nz*bs2); 2145 PetscMemzero(b->j,nz*sizeof(int)); 2146 b->i = b->j + nz; 2147 b->singlemalloc = 1; 2148 2149 b->i[0] = 0; 2150 for (i=1; i<mbs+1; i++) { 2151 b->i[i] = b->i[i-1] + b->imax[i-1]; 2152 } 2153 2154 /* b->ilen will count nonzeros in each block row so far. */ 2155 b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 2156 PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 2157 for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 2158 2159 b->bs = bs; 2160 b->bs2 = bs2; 2161 b->mbs = mbs; 2162 b->nz = 0; 2163 b->maxnz = nz*bs2; 2164 b->sorted = 0; 2165 b->roworiented = 1; 2166 b->nonew = 0; 2167 b->diag = 0; 2168 b->solve_work = 0; 2169 b->mult_work = 0; 2170 b->spptr = 0; 2171 B->info.nz_unneeded = (double)b->maxnz; 2172 2173 *A = B; 2174 ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr); 2175 if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); } 2176 return 0; 2177 } 2178 2179 #undef __FUNC__ 2180 #define __FUNC__ "MatConvertSameType_SeqBAIJ" 2181 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues) 2182 { 2183 Mat C; 2184 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 2185 int i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 2186 2187 if (a->i[mbs] != nz) SETERRQ(1,0,"Corrupt matrix"); 2188 2189 *B = 0; 2190 PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm); 2191 PLogObjectCreate(C); 2192 C->data = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c); 2193 PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps)); 2194 C->destroy = MatDestroy_SeqBAIJ; 2195 C->view = MatView_SeqBAIJ; 2196 C->factor = A->factor; 2197 c->row = 0; 2198 c->col = 0; 2199 C->assembled = PETSC_TRUE; 2200 2201 c->m = C->m = a->m; 2202 c->n = C->n = a->n; 2203 C->M = a->m; 2204 C->N = a->n; 2205 2206 c->bs = a->bs; 2207 c->bs2 = a->bs2; 2208 c->mbs = a->mbs; 2209 c->nbs = a->nbs; 2210 2211 c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax); 2212 c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen); 2213 for ( i=0; i<mbs; i++ ) { 2214 c->imax[i] = a->imax[i]; 2215 c->ilen[i] = a->ilen[i]; 2216 } 2217 2218 /* allocate the matrix space */ 2219 c->singlemalloc = 1; 2220 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int)); 2221 c->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a); 2222 c->j = (int *) (c->a + nz*bs2); 2223 c->i = c->j + nz; 2224 PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int)); 2225 if (mbs > 0) { 2226 PetscMemcpy(c->j,a->j,nz*sizeof(int)); 2227 if (cpvalues == COPY_VALUES) { 2228 PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar)); 2229 } 2230 } 2231 2232 PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 2233 c->sorted = a->sorted; 2234 c->roworiented = a->roworiented; 2235 c->nonew = a->nonew; 2236 2237 if (a->diag) { 2238 c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag); 2239 PLogObjectMemory(C,(mbs+1)*sizeof(int)); 2240 for ( i=0; i<mbs; i++ ) { 2241 c->diag[i] = a->diag[i]; 2242 } 2243 } 2244 else c->diag = 0; 2245 c->nz = a->nz; 2246 c->maxnz = a->maxnz; 2247 c->solve_work = 0; 2248 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 2249 c->mult_work = 0; 2250 *B = C; 2251 return 0; 2252 } 2253 2254 #undef __FUNC__ 2255 #define __FUNC__ "MatLoad_SeqBAIJ" 2256 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 2257 { 2258 Mat_SeqBAIJ *a; 2259 Mat B; 2260 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg; 2261 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2262 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 2263 int *masked, nmask,tmp,bs2,ishift; 2264 Scalar *aa; 2265 MPI_Comm comm = ((PetscObject) viewer)->comm; 2266 2267 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 2268 bs2 = bs*bs; 2269 2270 MPI_Comm_size(comm,&size); 2271 if (size > 1) SETERRQ(1,0,"view must have one processor"); 2272 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 2273 ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr); 2274 if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not Mat object"); 2275 M = header[1]; N = header[2]; nz = header[3]; 2276 2277 if (M != N) SETERRQ(1,0,"Can only do square matrices"); 2278 2279 /* 2280 This code adds extra rows to make sure the number of rows is 2281 divisible by the blocksize 2282 */ 2283 mbs = M/bs; 2284 extra_rows = bs - M + bs*(mbs); 2285 if (extra_rows == bs) extra_rows = 0; 2286 else mbs++; 2287 if (extra_rows) { 2288 PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 2289 } 2290 2291 /* read in row lengths */ 2292 rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 2293 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 2294 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 2295 2296 /* read in column indices */ 2297 jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj); 2298 ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr); 2299 for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 2300 2301 /* loop over row lengths determining block row lengths */ 2302 browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 2303 PetscMemzero(browlengths,mbs*sizeof(int)); 2304 mask = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask); 2305 PetscMemzero(mask,mbs*sizeof(int)); 2306 masked = mask + mbs; 2307 rowcount = 0; nzcount = 0; 2308 for ( i=0; i<mbs; i++ ) { 2309 nmask = 0; 2310 for ( j=0; j<bs; j++ ) { 2311 kmax = rowlengths[rowcount]; 2312 for ( k=0; k<kmax; k++ ) { 2313 tmp = jj[nzcount++]/bs; 2314 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2315 } 2316 rowcount++; 2317 } 2318 browlengths[i] += nmask; 2319 /* zero out the mask elements we set */ 2320 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 2321 } 2322 2323 /* create our matrix */ 2324 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A); 2325 CHKERRQ(ierr); 2326 B = *A; 2327 a = (Mat_SeqBAIJ *) B->data; 2328 2329 /* set matrix "i" values */ 2330 a->i[0] = 0; 2331 for ( i=1; i<= mbs; i++ ) { 2332 a->i[i] = a->i[i-1] + browlengths[i-1]; 2333 a->ilen[i-1] = browlengths[i-1]; 2334 } 2335 a->nz = 0; 2336 for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 2337 2338 /* read in nonzero values */ 2339 aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 2340 ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr); 2341 for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 2342 2343 /* set "a" and "j" values into matrix */ 2344 nzcount = 0; jcount = 0; 2345 for ( i=0; i<mbs; i++ ) { 2346 nzcountb = nzcount; 2347 nmask = 0; 2348 for ( j=0; j<bs; j++ ) { 2349 kmax = rowlengths[i*bs+j]; 2350 for ( k=0; k<kmax; k++ ) { 2351 tmp = jj[nzcount++]/bs; 2352 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2353 } 2354 rowcount++; 2355 } 2356 /* sort the masked values */ 2357 PetscSortInt(nmask,masked); 2358 2359 /* set "j" values into matrix */ 2360 maskcount = 1; 2361 for ( j=0; j<nmask; j++ ) { 2362 a->j[jcount++] = masked[j]; 2363 mask[masked[j]] = maskcount++; 2364 } 2365 /* set "a" values into matrix */ 2366 ishift = bs2*a->i[i]; 2367 for ( j=0; j<bs; j++ ) { 2368 kmax = rowlengths[i*bs+j]; 2369 for ( k=0; k<kmax; k++ ) { 2370 tmp = jj[nzcountb]/bs ; 2371 block = mask[tmp] - 1; 2372 point = jj[nzcountb] - bs*tmp; 2373 idx = ishift + bs2*block + j + bs*point; 2374 a->a[idx] = aa[nzcountb++]; 2375 } 2376 } 2377 /* zero out the mask elements we set */ 2378 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 2379 } 2380 if (jcount != a->nz) SETERRQ(1,0,"Error bad binary matrix"); 2381 2382 PetscFree(rowlengths); 2383 PetscFree(browlengths); 2384 PetscFree(aa); 2385 PetscFree(jj); 2386 PetscFree(mask); 2387 2388 B->assembled = PETSC_TRUE; 2389 2390 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr); 2391 if (flg) { 2392 Viewer tviewer; 2393 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2394 ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO,0);CHKERRQ(ierr); 2395 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2396 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2397 } 2398 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr); 2399 if (flg) { 2400 Viewer tviewer; 2401 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2402 ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO_LONG,0);CHKERRQ(ierr); 2403 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2404 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2405 } 2406 ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr); 2407 if (flg) { 2408 Viewer tviewer; 2409 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2410 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2411 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2412 } 2413 ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr); 2414 if (flg) { 2415 Viewer tviewer; 2416 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2417 ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_MATLAB,"M");CHKERRQ(ierr); 2418 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2419 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2420 } 2421 ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr); 2422 if (flg) { 2423 Viewer tviewer; 2424 ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr); 2425 ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr); 2426 ierr = ViewerFlush(tviewer); CHKERRQ(ierr); 2427 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2428 } 2429 return 0; 2430 } 2431 2432 2433 2434