1 #ifndef lint 2 static char vcid[] = "$Id: baij.c,v 1.95 1997/03/26 01:36:13 bsmith Exp balay $"; 3 #endif 4 5 /* 6 Defines the basic matrix operations for the BAIJ (compressed row) 7 matrix storage format. 8 */ 9 10 #include "pinclude/pviewer.h" 11 #include "sys.h" 12 #include "src/mat/impls/baij/seq/baij.h" 13 #include "src/vec/vecimpl.h" 14 #include "src/inline/spops.h" 15 #include "petsc.h" 16 17 18 /* 19 Adds diagonal pointers to sparse matrix structure. 20 */ 21 22 #undef __FUNC__ 23 #define __FUNC__ "MatMarkDiag_SeqBAIJ" 24 int MatMarkDiag_SeqBAIJ(Mat A) 25 { 26 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 27 int i,j, *diag, m = a->mbs; 28 29 diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag); 30 PLogObjectMemory(A,(m+1)*sizeof(int)); 31 for ( i=0; i<m; i++ ) { 32 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 33 if (a->j[j] == i) { 34 diag[i] = j; 35 break; 36 } 37 } 38 } 39 a->diag = diag; 40 return 0; 41 } 42 43 44 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 45 46 #undef __FUNC__ 47 #define __FUNC__ "MatGetRowIJ_SeqBAIJ" 48 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 49 PetscTruth *done) 50 { 51 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 52 int ierr, n = a->mbs,i; 53 54 *nn = n; 55 if (!ia) return 0; 56 if (symmetric) { 57 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 58 } else if (oshift == 1) { 59 /* temporarily add 1 to i and j indices */ 60 int nz = a->i[n] + 1; 61 for ( i=0; i<nz; i++ ) a->j[i]++; 62 for ( i=0; i<n+1; i++ ) a->i[i]++; 63 *ia = a->i; *ja = a->j; 64 } else { 65 *ia = a->i; *ja = a->j; 66 } 67 68 return 0; 69 } 70 71 #undef __FUNC__ 72 #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ" /* ADIC Ignore */ 73 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 74 PetscTruth *done) 75 { 76 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 77 int i,n = a->mbs; 78 79 if (!ia) return 0; 80 if (symmetric) { 81 PetscFree(*ia); 82 PetscFree(*ja); 83 } else if (oshift == 1) { 84 int nz = a->i[n]; 85 for ( i=0; i<nz; i++ ) a->j[i]--; 86 for ( i=0; i<n+1; i++ ) a->i[i]--; 87 } 88 return 0; 89 } 90 91 92 #undef __FUNC__ 93 #define __FUNC__ "MatView_SeqBAIJ_Binary" /* ADIC Ignore */ 94 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 95 { 96 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 97 int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 98 Scalar *aa; 99 100 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 101 col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 102 col_lens[0] = MAT_COOKIE; 103 104 col_lens[1] = a->m; 105 col_lens[2] = a->n; 106 col_lens[3] = a->nz*bs2; 107 108 /* store lengths of each row and write (including header) to file */ 109 count = 0; 110 for ( i=0; i<a->mbs; i++ ) { 111 for ( j=0; j<bs; j++ ) { 112 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 113 } 114 } 115 ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr); 116 PetscFree(col_lens); 117 118 /* store column indices (zero start index) */ 119 jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj); 120 count = 0; 121 for ( i=0; i<a->mbs; i++ ) { 122 for ( j=0; j<bs; j++ ) { 123 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 124 for ( l=0; l<bs; l++ ) { 125 jj[count++] = bs*a->j[k] + l; 126 } 127 } 128 } 129 } 130 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr); 131 PetscFree(jj); 132 133 /* store nonzero values */ 134 aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa); 135 count = 0; 136 for ( i=0; i<a->mbs; i++ ) { 137 for ( j=0; j<bs; j++ ) { 138 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 139 for ( l=0; l<bs; l++ ) { 140 aa[count++] = a->a[bs2*k + l*bs + j]; 141 } 142 } 143 } 144 } 145 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr); 146 PetscFree(aa); 147 return 0; 148 } 149 150 #undef __FUNC__ 151 #define __FUNC__ "MatView_SeqBAIJ_ASCII" /* ADIC Ignore */ 152 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 153 { 154 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 155 int ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2; 156 FILE *fd; 157 char *outputname; 158 159 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 160 ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr); 161 ierr = ViewerGetFormat(viewer,&format); 162 if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) { 163 fprintf(fd," block size is %d\n",bs); 164 } 165 else if (format == VIEWER_FORMAT_ASCII_MATLAB) { 166 SETERRQ(1,0,"Matlab format not supported"); 167 } 168 else if (format == VIEWER_FORMAT_ASCII_COMMON) { 169 for ( i=0; i<a->mbs; i++ ) { 170 for ( j=0; j<bs; j++ ) { 171 fprintf(fd,"row %d:",i*bs+j); 172 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 173 for ( l=0; l<bs; l++ ) { 174 #if defined(PETSC_COMPLEX) 175 if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0) 176 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 177 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 178 else if (real(a->a[bs2*k + l*bs + j]) != 0.0) 179 fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 180 #else 181 if (a->a[bs2*k + l*bs + j] != 0.0) 182 fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 183 #endif 184 } 185 } 186 fprintf(fd,"\n"); 187 } 188 } 189 } 190 else { 191 for ( i=0; i<a->mbs; i++ ) { 192 for ( j=0; j<bs; j++ ) { 193 fprintf(fd,"row %d:",i*bs+j); 194 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 195 for ( l=0; l<bs; l++ ) { 196 #if defined(PETSC_COMPLEX) 197 if (imag(a->a[bs2*k + l*bs + j]) != 0.0) { 198 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 199 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 200 } 201 else { 202 fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 203 } 204 #else 205 fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 206 #endif 207 } 208 } 209 fprintf(fd,"\n"); 210 } 211 } 212 } 213 fflush(fd); 214 return 0; 215 } 216 217 #undef __FUNC__ 218 #define __FUNC__ "MatView_SeqBAIJ_Draw" /* ADIC Ignore */ 219 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer) 220 { 221 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *) A->data; 222 int row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2; 223 double xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r; 224 Scalar *aa; 225 Draw draw; 226 DrawButton button; 227 PetscTruth isnull; 228 229 ViewerDrawGetDraw(viewer,&draw); 230 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 231 232 xr = a->n; yr = a->m; h = yr/10.0; w = xr/10.0; 233 xr += w; yr += h; xl = -w; yl = -h; 234 ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr); 235 /* loop over matrix elements drawing boxes */ 236 color = DRAW_BLUE; 237 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 238 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 239 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 240 x_l = a->j[j]*bs; x_r = x_l + 1.0; 241 aa = a->a + j*bs2; 242 for ( k=0; k<bs; k++ ) { 243 for ( l=0; l<bs; l++ ) { 244 if (PetscReal(*aa++) >= 0.) continue; 245 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 246 } 247 } 248 } 249 } 250 color = DRAW_CYAN; 251 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 252 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 253 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 254 x_l = a->j[j]*bs; x_r = x_l + 1.0; 255 aa = a->a + j*bs2; 256 for ( k=0; k<bs; k++ ) { 257 for ( l=0; l<bs; l++ ) { 258 if (PetscReal(*aa++) != 0.) continue; 259 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 260 } 261 } 262 } 263 } 264 265 color = DRAW_RED; 266 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 267 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 268 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 269 x_l = a->j[j]*bs; x_r = x_l + 1.0; 270 aa = a->a + j*bs2; 271 for ( k=0; k<bs; k++ ) { 272 for ( l=0; l<bs; l++ ) { 273 if (PetscReal(*aa++) <= 0.) continue; 274 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 275 } 276 } 277 } 278 } 279 280 DrawFlush(draw); 281 DrawGetPause(draw,&pause); 282 if (pause >= 0) { PetscSleep(pause); return 0;} 283 284 /* allow the matrix to zoom or shrink */ 285 ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0); 286 while (button != BUTTON_RIGHT) { 287 DrawClear(draw); 288 if (button == BUTTON_LEFT) scale = .5; 289 else if (button == BUTTON_CENTER) scale = 2.; 290 xl = scale*(xl + w - xc) + xc - w*scale; 291 xr = scale*(xr - w - xc) + xc + w*scale; 292 yl = scale*(yl + h - yc) + yc - h*scale; 293 yr = scale*(yr - h - yc) + yc + h*scale; 294 w *= scale; h *= scale; 295 ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr); 296 color = DRAW_BLUE; 297 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 298 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 299 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 300 x_l = a->j[j]*bs; x_r = x_l + 1.0; 301 aa = a->a + j*bs2; 302 for ( k=0; k<bs; k++ ) { 303 for ( l=0; l<bs; l++ ) { 304 if (PetscReal(*aa++) >= 0.) continue; 305 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 306 } 307 } 308 } 309 } 310 color = DRAW_CYAN; 311 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 312 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 313 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 314 x_l = a->j[j]*bs; x_r = x_l + 1.0; 315 aa = a->a + j*bs2; 316 for ( k=0; k<bs; k++ ) { 317 for ( l=0; l<bs; l++ ) { 318 if (PetscReal(*aa++) != 0.) continue; 319 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 320 } 321 } 322 } 323 } 324 325 color = DRAW_RED; 326 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 327 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 328 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 329 x_l = a->j[j]*bs; x_r = x_l + 1.0; 330 aa = a->a + j*bs2; 331 for ( k=0; k<bs; k++ ) { 332 for ( l=0; l<bs; l++ ) { 333 if (PetscReal(*aa++) <= 0.) continue; 334 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 335 } 336 } 337 } 338 } 339 ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0); 340 } 341 return 0; 342 } 343 344 #undef __FUNC__ 345 #define __FUNC__ "MatView_SeqBAIJ" /* ADIC Ignore */ 346 int MatView_SeqBAIJ(PetscObject obj,Viewer viewer) 347 { 348 Mat A = (Mat) obj; 349 ViewerType vtype; 350 int ierr; 351 352 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 353 if (vtype == MATLAB_VIEWER) { 354 SETERRQ(1,0,"Matlab viewer not supported"); 355 } 356 else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){ 357 return MatView_SeqBAIJ_ASCII(A,viewer); 358 } 359 else if (vtype == BINARY_FILE_VIEWER) { 360 return MatView_SeqBAIJ_Binary(A,viewer); 361 } 362 else if (vtype == DRAW_VIEWER) { 363 return MatView_SeqBAIJ_Draw(A,viewer); 364 } 365 return 0; 366 } 367 368 #define CHUNKSIZE 10 369 370 #undef __FUNC__ 371 #define __FUNC__ "MatSetValues_SeqBAIJ" 372 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 373 { 374 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 375 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 376 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 377 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 378 int ridx,cidx,bs2=a->bs2; 379 Scalar *ap,value,*aa=a->a,*bap; 380 381 for ( k=0; k<m; k++ ) { /* loop over added rows */ 382 row = im[k]; brow = row/bs; 383 #if defined(PETSC_BOPT_g) 384 if (row < 0) SETERRQ(1,0,"Negative row"); 385 if (row >= a->m) SETERRQ(1,0,"Row too large"); 386 #endif 387 rp = aj + ai[brow]; 388 ap = aa + bs2*ai[brow]; 389 rmax = imax[brow]; 390 nrow = ailen[brow]; 391 low = 0; 392 for ( l=0; l<n; l++ ) { /* loop over added columns */ 393 #if defined(PETSC_BOPT_g) 394 if (in[l] < 0) SETERRQ(1,0,"Negative column"); 395 if (in[l] >= a->n) SETERRQ(1,0,"Column too large"); 396 #endif 397 col = in[l]; bcol = col/bs; 398 ridx = row % bs; cidx = col % bs; 399 if (roworiented) { 400 value = *v++; 401 } 402 else { 403 value = v[k + l*m]; 404 } 405 if (!sorted) low = 0; high = nrow; 406 while (high-low > 7) { 407 t = (low+high)/2; 408 if (rp[t] > bcol) high = t; 409 else low = t; 410 } 411 for ( i=low; i<high; i++ ) { 412 if (rp[i] > bcol) break; 413 if (rp[i] == bcol) { 414 bap = ap + bs2*i + bs*cidx + ridx; 415 if (is == ADD_VALUES) *bap += value; 416 else *bap = value; 417 goto noinsert1; 418 } 419 } 420 if (nonew == 1) goto noinsert1; 421 else if (nonew == -1) SETERRQ(1,1,"Inserting a new nonzero in the matrix"); 422 if (nrow >= rmax) { 423 /* there is no extra room in row, therefore enlarge */ 424 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 425 Scalar *new_a; 426 427 /* malloc new storage space */ 428 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 429 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 430 new_j = (int *) (new_a + bs2*new_nz); 431 new_i = new_j + new_nz; 432 433 /* copy over old data into new slots */ 434 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} 435 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 436 PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); 437 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 438 PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, 439 len*sizeof(int)); 440 PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); 441 PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 442 PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), 443 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); 444 /* free up old matrix storage */ 445 PetscFree(a->a); 446 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 447 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 448 a->singlemalloc = 1; 449 450 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 451 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 452 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 453 a->maxnz += bs2*CHUNKSIZE; 454 a->reallocs++; 455 a->nz++; 456 } 457 N = nrow++ - 1; 458 /* shift up all the later entries in this row */ 459 for ( ii=N; ii>=i; ii-- ) { 460 rp[ii+1] = rp[ii]; 461 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 462 } 463 if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 464 rp[i] = bcol; 465 ap[bs2*i + bs*cidx + ridx] = value; 466 noinsert1:; 467 low = i; 468 } 469 ailen[brow] = nrow; 470 } 471 return 0; 472 } 473 474 #undef __FUNC__ 475 #define __FUNC__ "MatSetValues_SeqBAIJ" 476 int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 477 { 478 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 479 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 480 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 481 int roworiented=a->roworiented; 482 int *aj=a->j,nonew=a->nonew; 483 int bs2=a->bs2,bs=a->bs,stepval; 484 Scalar *ap,*value=v,*aa=a->a,*bap; 485 486 if (roworiented) { 487 stepval = (n-1)*bs; 488 } else { 489 stepval = (m-1)*bs; 490 } 491 for ( k=0; k<m; k++ ) { /* loop over added rows */ 492 row = im[k]; 493 #if defined(PETSC_BOPT_g) 494 if (row < 0) SETERRQ(1,0,"Negative row"); 495 if (row >= a->mbs) SETERRQ(1,0,"Row too large"); 496 #endif 497 rp = aj + ai[row]; 498 ap = aa + bs2*ai[row]; 499 rmax = imax[row]; 500 nrow = ailen[row]; 501 low = 0; 502 for ( l=0; l<n; l++ ) { /* loop over added columns */ 503 #if defined(PETSC_BOPT_g) 504 if (in[l] < 0) SETERRQ(1,0,"Negative column"); 505 if (in[l] >= a->nbs) SETERRQ(1,0,"Column too large"); 506 #endif 507 col = in[l]; 508 if (roworiented) { 509 value = v + k*(stepval+bs)*bs + l*bs; 510 } else { 511 value = v + l*(stepval+bs)*bs + k*bs; 512 } 513 if (!sorted) low = 0; high = nrow; 514 while (high-low > 7) { 515 t = (low+high)/2; 516 if (rp[t] > col) high = t; 517 else low = t; 518 } 519 for ( i=low; i<high; i++ ) { 520 if (rp[i] > col) break; 521 if (rp[i] == col) { 522 bap = ap + bs2*i; 523 if (roworiented) { 524 if (is == ADD_VALUES) { 525 for ( ii=0; ii<bs; ii++,value+=stepval ) 526 for (jj=ii; jj<bs2; jj+=bs ) 527 bap[jj] += *value++; 528 } else { 529 for ( ii=0; ii<bs; ii++,value+=stepval ) 530 for (jj=ii; jj<bs2; jj+=bs ) 531 bap[jj] = *value++; 532 } 533 } else { 534 if (is == ADD_VALUES) { 535 for ( ii=0; ii<bs; ii++,value+=stepval ) 536 for (jj=0; jj<bs; jj++ ) 537 *bap++ += *value++; 538 } else { 539 for ( ii=0; ii<bs; ii++,value+=stepval ) 540 for (jj=0; jj<bs; jj++ ) 541 *bap++ = *value++; 542 } 543 } 544 goto noinsert2; 545 } 546 } 547 if (nonew) goto noinsert2; 548 if (nrow >= rmax) { 549 /* there is no extra room in row, therefore enlarge */ 550 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 551 Scalar *new_a; 552 553 /* malloc new storage space */ 554 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 555 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 556 new_j = (int *) (new_a + bs2*new_nz); 557 new_i = new_j + new_nz; 558 559 /* copy over old data into new slots */ 560 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} 561 for ( ii=row+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 562 PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int)); 563 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 564 PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow, 565 len*sizeof(int)); 566 PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(Scalar)); 567 PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 568 PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE), 569 aa+bs2*(ai[row]+nrow),bs2*len*sizeof(Scalar)); 570 /* free up old matrix storage */ 571 PetscFree(a->a); 572 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 573 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 574 a->singlemalloc = 1; 575 576 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 577 rmax = imax[row] = imax[row] + CHUNKSIZE; 578 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 579 a->maxnz += bs2*CHUNKSIZE; 580 a->reallocs++; 581 a->nz++; 582 } 583 N = nrow++ - 1; 584 /* shift up all the later entries in this row */ 585 for ( ii=N; ii>=i; ii-- ) { 586 rp[ii+1] = rp[ii]; 587 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 588 } 589 if (N >= i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 590 rp[i] = col; 591 bap = ap + bs2*i; 592 if (roworiented) { 593 for ( ii=0; ii<bs; ii++,value+=stepval) 594 for (jj=ii; jj<bs2; jj+=bs ) 595 bap[jj] = *value++; 596 } else { 597 for ( ii=0; ii<bs; ii++,value+=stepval ) 598 for (jj=0; jj<bs; jj++ ) 599 *bap++ = *value++; 600 } 601 noinsert2:; 602 low = i; 603 } 604 ailen[row] = nrow; 605 } 606 return 0; 607 } 608 609 #undef __FUNC__ 610 #define __FUNC__ "MatGetSize_SeqBAIJ" /* ADIC Ignore */ 611 int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 612 { 613 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 614 *m = a->m; *n = a->n; 615 return 0; 616 } 617 618 #undef __FUNC__ 619 #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ" /* ADIC Ignore */ 620 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 621 { 622 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 623 *m = 0; *n = a->m; 624 return 0; 625 } 626 627 #undef __FUNC__ 628 #define __FUNC__ "MatGetRow_SeqBAIJ" 629 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 630 { 631 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 632 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 633 Scalar *aa,*v_i,*aa_i; 634 635 bs = a->bs; 636 ai = a->i; 637 aj = a->j; 638 aa = a->a; 639 bs2 = a->bs2; 640 641 if (row < 0 || row >= a->m) SETERRQ(1,0,"Row out of range"); 642 643 bn = row/bs; /* Block number */ 644 bp = row % bs; /* Block Position */ 645 M = ai[bn+1] - ai[bn]; 646 *nz = bs*M; 647 648 if (v) { 649 *v = 0; 650 if (*nz) { 651 *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v); 652 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 653 v_i = *v + i*bs; 654 aa_i = aa + bs2*(ai[bn] + i); 655 for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];} 656 } 657 } 658 } 659 660 if (idx) { 661 *idx = 0; 662 if (*nz) { 663 *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx); 664 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 665 idx_i = *idx + i*bs; 666 itmp = bs*aj[ai[bn] + i]; 667 for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;} 668 } 669 } 670 } 671 return 0; 672 } 673 674 #undef __FUNC__ 675 #define __FUNC__ "MatRestoreRow_SeqBAIJ" /* ADIC Ignore */ 676 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 677 { 678 if (idx) {if (*idx) PetscFree(*idx);} 679 if (v) {if (*v) PetscFree(*v);} 680 return 0; 681 } 682 683 #undef __FUNC__ 684 #define __FUNC__ "MatTranspose_SeqBAIJ" 685 int MatTranspose_SeqBAIJ(Mat A,Mat *B) 686 { 687 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 688 Mat C; 689 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 690 int *rows,*cols,bs2=a->bs2; 691 Scalar *array=a->a; 692 693 if (B==PETSC_NULL && mbs!=nbs) 694 SETERRQ(1,0,"Square matrix only for in-place"); 695 col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col); 696 PetscMemzero(col,(1+nbs)*sizeof(int)); 697 698 for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1; 699 ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr); 700 PetscFree(col); 701 rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows); 702 cols = rows + bs; 703 for ( i=0; i<mbs; i++ ) { 704 cols[0] = i*bs; 705 for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1; 706 len = ai[i+1] - ai[i]; 707 for ( j=0; j<len; j++ ) { 708 rows[0] = (*aj++)*bs; 709 for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1; 710 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr); 711 array += bs2; 712 } 713 } 714 PetscFree(rows); 715 716 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 717 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 718 719 if (B != PETSC_NULL) { 720 *B = C; 721 } else { 722 /* This isn't really an in-place transpose */ 723 PetscFree(a->a); 724 if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);} 725 if (a->diag) PetscFree(a->diag); 726 if (a->ilen) PetscFree(a->ilen); 727 if (a->imax) PetscFree(a->imax); 728 if (a->solve_work) PetscFree(a->solve_work); 729 PetscFree(a); 730 PetscMemcpy(A,C,sizeof(struct _Mat)); 731 PetscHeaderDestroy(C); 732 } 733 return 0; 734 } 735 736 737 #undef __FUNC__ 738 #define __FUNC__ "MatAssemblyEnd_SeqBAIJ" 739 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 740 { 741 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 742 int fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax; 743 int m = a->m,*ip, N, *ailen = a->ilen; 744 int mbs = a->mbs, bs2 = a->bs2,rmax; 745 Scalar *aa = a->a, *ap; 746 747 if (mode == MAT_FLUSH_ASSEMBLY) return 0; 748 749 rmax = ailen[0]; 750 for ( i=1; i<mbs; i++ ) { 751 /* move each row back by the amount of empty slots (fshift) before it*/ 752 fshift += imax[i-1] - ailen[i-1]; 753 rmax = PetscMax(rmax,ailen[i]); 754 if (fshift) { 755 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 756 N = ailen[i]; 757 for ( j=0; j<N; j++ ) { 758 ip[j-fshift] = ip[j]; 759 PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar)); 760 } 761 } 762 ai[i] = ai[i-1] + ailen[i-1]; 763 } 764 if (mbs) { 765 fshift += imax[mbs-1] - ailen[mbs-1]; 766 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 767 } 768 /* reset ilen and imax for each row */ 769 for ( i=0; i<mbs; i++ ) { 770 ailen[i] = imax[i] = ai[i+1] - ai[i]; 771 } 772 a->nz = ai[mbs]; 773 774 /* diagonals may have moved, so kill the diagonal pointers */ 775 if (fshift && a->diag) { 776 PetscFree(a->diag); 777 PLogObjectMemory(A,-(m+1)*sizeof(int)); 778 a->diag = 0; 779 } 780 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 781 m,a->n,a->bs,fshift*bs2,a->nz*bs2); 782 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n", 783 a->reallocs); 784 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 785 a->reallocs = 0; 786 A->info.nz_unneeded = (double)fshift*bs2; 787 788 return 0; 789 } 790 791 #undef __FUNC__ 792 #define __FUNC__ "MatZeroEntries_SeqBAIJ" 793 int MatZeroEntries_SeqBAIJ(Mat A) 794 { 795 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 796 PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar)); 797 return 0; 798 } 799 800 #undef __FUNC__ 801 #define __FUNC__ "MatDestroy_SeqBAIJ" /* ADIC Ignore */ 802 int MatDestroy_SeqBAIJ(PetscObject obj) 803 { 804 Mat A = (Mat) obj; 805 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 806 int ierr; 807 808 #if defined(PETSC_LOG) 809 PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 810 #endif 811 PetscFree(a->a); 812 if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);} 813 if (a->diag) PetscFree(a->diag); 814 if (a->ilen) PetscFree(a->ilen); 815 if (a->imax) PetscFree(a->imax); 816 if (a->solve_work) PetscFree(a->solve_work); 817 if (a->mult_work) PetscFree(a->mult_work); 818 PetscFree(a); 819 if (A->mapping) { 820 ierr = ISLocalToGlobalMappingDestroy(A->mapping); CHKERRQ(ierr); 821 } 822 PLogObjectDestroy(A); 823 PetscHeaderDestroy(A); 824 return 0; 825 } 826 827 #undef __FUNC__ 828 #define __FUNC__ "MatSetOption_SeqBAIJ" /* ADIC Ignore */ 829 int MatSetOption_SeqBAIJ(Mat A,MatOption op) 830 { 831 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 832 if (op == MAT_ROW_ORIENTED) a->roworiented = 1; 833 else if (op == MAT_COLUMN_ORIENTED) a->roworiented = 0; 834 else if (op == MAT_COLUMNS_SORTED) a->sorted = 1; 835 else if (op == MAT_COLUMNS_UNSORTED) a->sorted = 0; 836 else if (op == MAT_NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 837 else if (op == MAT_NEW_NONZERO_LOCATION_ERROR) a->nonew = -1; 838 else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 839 else if (op == MAT_ROWS_SORTED || 840 op == MAT_ROWS_UNSORTED || 841 op == MAT_SYMMETRIC || 842 op == MAT_STRUCTURALLY_SYMMETRIC || 843 op == MAT_YES_NEW_DIAGONALS || 844 op == MAT_IGNORE_OFF_PROC_ENTRIES) 845 PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n"); 846 else if (op == MAT_NO_NEW_DIAGONALS) 847 {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");} 848 else 849 {SETERRQ(PETSC_ERR_SUP,0,"unknown option");} 850 return 0; 851 } 852 853 854 /* -------------------------------------------------------*/ 855 /* Should check that shapes of vectors and matrices match */ 856 /* -------------------------------------------------------*/ 857 #include "pinclude/plapack.h" 858 859 #undef __FUNC__ 860 #define __FUNC__ "MatMult_SeqBAIJ_1" 861 static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz) 862 { 863 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 864 register Scalar *x,*z,*v,sum; 865 int mbs=a->mbs,i,*idx,*ii,n; 866 867 VecGetArray_Fast(xx,x); 868 VecGetArray_Fast(zz,z); 869 870 idx = a->j; 871 v = a->a; 872 ii = a->i; 873 874 for ( i=0; i<mbs; i++ ) { 875 n = ii[1] - ii[0]; ii++; 876 sum = 0.0; 877 while (n--) sum += *v++ * x[*idx++]; 878 z[i] = sum; 879 } 880 VecRestoreArray_Fast(xx,x); 881 VecRestoreArray_Fast(zz,z); 882 PLogFlops(2*a->nz - a->m); 883 return 0; 884 } 885 886 #undef __FUNC__ 887 #define __FUNC__ "MatMult_SeqBAIJ_2" 888 static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz) 889 { 890 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 891 register Scalar *x,*z,*v,*xb,sum1,sum2; 892 register Scalar x1,x2; 893 int mbs=a->mbs,i,*idx,*ii; 894 int j,n; 895 896 VecGetArray_Fast(xx,x); 897 VecGetArray_Fast(zz,z); 898 899 idx = a->j; 900 v = a->a; 901 ii = a->i; 902 903 for ( i=0; i<mbs; i++ ) { 904 n = ii[1] - ii[0]; ii++; 905 sum1 = 0.0; sum2 = 0.0; 906 for ( j=0; j<n; j++ ) { 907 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 908 sum1 += v[0]*x1 + v[2]*x2; 909 sum2 += v[1]*x1 + v[3]*x2; 910 v += 4; 911 } 912 z[0] = sum1; z[1] = sum2; 913 z += 2; 914 } 915 VecRestoreArray_Fast(xx,x); 916 VecRestoreArray_Fast(zz,z); 917 PLogFlops(4*a->nz - a->m); 918 return 0; 919 } 920 921 #undef __FUNC__ 922 #define __FUNC__ "MatMult_SeqBAIJ_3" 923 static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz) 924 { 925 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 926 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 927 int mbs=a->mbs,i,*idx,*ii,j,n; 928 929 VecGetArray_Fast(xx,x); 930 VecGetArray_Fast(zz,z); 931 932 idx = a->j; 933 v = a->a; 934 ii = a->i; 935 936 for ( i=0; i<mbs; i++ ) { 937 n = ii[1] - ii[0]; ii++; 938 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 939 for ( j=0; j<n; j++ ) { 940 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 941 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 942 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 943 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 944 v += 9; 945 } 946 z[0] = sum1; z[1] = sum2; z[2] = sum3; 947 z += 3; 948 } 949 VecRestoreArray_Fast(xx,x); 950 VecRestoreArray_Fast(zz,z); 951 PLogFlops(18*a->nz - a->m); 952 return 0; 953 } 954 955 #undef __FUNC__ 956 #define __FUNC__ "MatMult_SeqBAIJ_4" 957 static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz) 958 { 959 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 960 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4; 961 register Scalar x1,x2,x3,x4; 962 int mbs=a->mbs,i,*idx,*ii; 963 int j,n; 964 965 VecGetArray_Fast(xx,x); 966 VecGetArray_Fast(zz,z); 967 968 idx = a->j; 969 v = a->a; 970 ii = a->i; 971 972 for ( i=0; i<mbs; i++ ) { 973 n = ii[1] - ii[0]; ii++; 974 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 975 for ( j=0; j<n; j++ ) { 976 xb = x + 4*(*idx++); 977 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 978 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 979 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 980 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 981 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 982 v += 16; 983 } 984 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 985 z += 4; 986 } 987 VecRestoreArray_Fast(xx,x); 988 VecRestoreArray_Fast(zz,z); 989 PLogFlops(32*a->nz - a->m); 990 return 0; 991 } 992 993 #undef __FUNC__ 994 #define __FUNC__ "MatMult_SeqBAIJ_5" 995 static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz) 996 { 997 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 998 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5; 999 register Scalar x1,x2,x3,x4,x5; 1000 int mbs=a->mbs,i,*idx,*ii,j,n; 1001 1002 VecGetArray_Fast(xx,x); 1003 VecGetArray_Fast(zz,z); 1004 1005 idx = a->j; 1006 v = a->a; 1007 ii = a->i; 1008 1009 for ( i=0; i<mbs; i++ ) { 1010 n = ii[1] - ii[0]; ii++; 1011 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 1012 for ( j=0; j<n; j++ ) { 1013 xb = x + 5*(*idx++); 1014 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 1015 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 1016 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 1017 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 1018 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 1019 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 1020 v += 25; 1021 } 1022 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 1023 z += 5; 1024 } 1025 VecRestoreArray_Fast(xx,x); 1026 VecRestoreArray_Fast(zz,z); 1027 PLogFlops(50*a->nz - a->m); 1028 return 0; 1029 } 1030 1031 #undef __FUNC__ 1032 #define __FUNC__ "MatMult_SeqBAIJ_7" 1033 static int MatMult_SeqBAIJ_7(Mat A,Vec xx,Vec zz) 1034 { 1035 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1036 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7; 1037 register Scalar x1,x2,x3,x4,x5,x6,x7; 1038 int mbs=a->mbs,i,*idx,*ii,j,n; 1039 1040 VecGetArray_Fast(xx,x); 1041 VecGetArray_Fast(zz,z); 1042 1043 idx = a->j; 1044 v = a->a; 1045 ii = a->i; 1046 1047 for ( i=0; i<mbs; i++ ) { 1048 n = ii[1] - ii[0]; ii++; 1049 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0; 1050 for ( j=0; j<n; j++ ) { 1051 xb = x + 7*(*idx++); 1052 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6]; 1053 sum1 += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 1054 sum2 += v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 1055 sum3 += v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 1056 sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 1057 sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 1058 sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 1059 sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 1060 v += 49; 1061 } 1062 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7; 1063 z += 7; 1064 } 1065 1066 VecRestoreArray_Fast(xx,x); 1067 VecRestoreArray_Fast(zz,z); 1068 PLogFlops(98*a->nz - a->m); 1069 return 0; 1070 } 1071 1072 #undef __FUNC__ 1073 #define __FUNC__ "MatMult_SeqBAIJ_N" 1074 static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz) 1075 { 1076 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1077 register Scalar *x,*z,*v,*xb; 1078 int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2; 1079 int _One = 1,ncols,k; 1080 Scalar _DOne = 1.0,*work,*workt,_DZero = 0.0; 1081 1082 VecGetArray_Fast(xx,x); 1083 VecGetArray_Fast(zz,z); 1084 1085 idx = a->j; 1086 v = a->a; 1087 ii = a->i; 1088 1089 1090 if (!a->mult_work) { 1091 k = PetscMax(a->m,a->n); 1092 a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work); 1093 } 1094 work = a->mult_work; 1095 for ( i=0; i<mbs; i++ ) { 1096 n = ii[1] - ii[0]; ii++; 1097 ncols = n*bs; 1098 workt = work; 1099 for ( j=0; j<n; j++ ) { 1100 xb = x + bs*(*idx++); 1101 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 1102 workt += bs; 1103 } 1104 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One); 1105 v += n*bs2; 1106 z += bs; 1107 } 1108 VecRestoreArray_Fast(xx,x); 1109 VecRestoreArray_Fast(zz,z); 1110 PLogFlops(2*a->nz*bs2 - a->m); 1111 return 0; 1112 } 1113 1114 #undef __FUNC__ 1115 #define __FUNC__ "MatMultAdd_SeqBAIJ_1" 1116 static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 1117 { 1118 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1119 register Scalar *x,*y,*z,*v,sum; 1120 int mbs=a->mbs,i,*idx,*ii,n; 1121 1122 VecGetArray_Fast(xx,x); 1123 VecGetArray_Fast(yy,y); 1124 VecGetArray_Fast(zz,z); 1125 1126 idx = a->j; 1127 v = a->a; 1128 ii = a->i; 1129 1130 for ( i=0; i<mbs; i++ ) { 1131 n = ii[1] - ii[0]; ii++; 1132 sum = y[i]; 1133 while (n--) sum += *v++ * x[*idx++]; 1134 z[i] = sum; 1135 } 1136 VecRestoreArray_Fast(xx,x); 1137 VecRestoreArray_Fast(yy,y); 1138 VecRestoreArray_Fast(zz,z); 1139 PLogFlops(2*a->nz); 1140 return 0; 1141 } 1142 1143 #undef __FUNC__ 1144 #define __FUNC__ "MatMultAdd_SeqBAIJ_2" 1145 static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 1146 { 1147 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1148 register Scalar *x,*y,*z,*v,*xb,sum1,sum2; 1149 register Scalar x1,x2; 1150 int mbs=a->mbs,i,*idx,*ii; 1151 int j,n; 1152 1153 VecGetArray_Fast(xx,x); 1154 VecGetArray_Fast(yy,y); 1155 VecGetArray_Fast(zz,z); 1156 1157 idx = a->j; 1158 v = a->a; 1159 ii = a->i; 1160 1161 for ( i=0; i<mbs; i++ ) { 1162 n = ii[1] - ii[0]; ii++; 1163 sum1 = y[0]; sum2 = y[1]; 1164 for ( j=0; j<n; j++ ) { 1165 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 1166 sum1 += v[0]*x1 + v[2]*x2; 1167 sum2 += v[1]*x1 + v[3]*x2; 1168 v += 4; 1169 } 1170 z[0] = sum1; z[1] = sum2; 1171 z += 2; y += 2; 1172 } 1173 VecRestoreArray_Fast(xx,x); 1174 VecRestoreArray_Fast(yy,y); 1175 VecRestoreArray_Fast(zz,z); 1176 PLogFlops(4*a->nz); 1177 return 0; 1178 } 1179 1180 #undef __FUNC__ 1181 #define __FUNC__ "MatMultAdd_SeqBAIJ_3" 1182 static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 1183 { 1184 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1185 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 1186 int mbs=a->mbs,i,*idx,*ii,j,n; 1187 1188 VecGetArray_Fast(xx,x); 1189 VecGetArray_Fast(yy,y); 1190 VecGetArray_Fast(zz,z); 1191 1192 idx = a->j; 1193 v = a->a; 1194 ii = a->i; 1195 1196 for ( i=0; i<mbs; i++ ) { 1197 n = ii[1] - ii[0]; ii++; 1198 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; 1199 for ( j=0; j<n; j++ ) { 1200 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1201 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 1202 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 1203 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 1204 v += 9; 1205 } 1206 z[0] = sum1; z[1] = sum2; z[2] = sum3; 1207 z += 3; y += 3; 1208 } 1209 VecRestoreArray_Fast(xx,x); 1210 VecRestoreArray_Fast(yy,y); 1211 VecRestoreArray_Fast(zz,z); 1212 PLogFlops(18*a->nz); 1213 return 0; 1214 } 1215 1216 #undef __FUNC__ 1217 #define __FUNC__ "MatMultAdd_SeqBAIJ_4" 1218 static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 1219 { 1220 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1221 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4; 1222 register Scalar x1,x2,x3,x4; 1223 int mbs=a->mbs,i,*idx,*ii; 1224 int j,n; 1225 1226 VecGetArray_Fast(xx,x); 1227 VecGetArray_Fast(yy,y); 1228 VecGetArray_Fast(zz,z); 1229 1230 idx = a->j; 1231 v = a->a; 1232 ii = a->i; 1233 1234 for ( i=0; i<mbs; i++ ) { 1235 n = ii[1] - ii[0]; ii++; 1236 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; 1237 for ( j=0; j<n; j++ ) { 1238 xb = x + 4*(*idx++); 1239 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1240 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 1241 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 1242 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 1243 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 1244 v += 16; 1245 } 1246 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 1247 z += 4; y += 4; 1248 } 1249 VecRestoreArray_Fast(xx,x); 1250 VecRestoreArray_Fast(yy,y); 1251 VecRestoreArray_Fast(zz,z); 1252 PLogFlops(32*a->nz); 1253 return 0; 1254 } 1255 1256 #undef __FUNC__ 1257 #define __FUNC__ "MatMultAdd_SeqBAIJ_5" 1258 static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 1259 { 1260 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1261 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5; 1262 register Scalar x1,x2,x3,x4,x5; 1263 int mbs=a->mbs,i,*idx,*ii,j,n; 1264 1265 VecGetArray_Fast(xx,x); 1266 VecGetArray_Fast(yy,y); 1267 VecGetArray_Fast(zz,z); 1268 1269 idx = a->j; 1270 v = a->a; 1271 ii = a->i; 1272 1273 for ( i=0; i<mbs; i++ ) { 1274 n = ii[1] - ii[0]; ii++; 1275 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; 1276 for ( j=0; j<n; j++ ) { 1277 xb = x + 5*(*idx++); 1278 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 1279 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 1280 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 1281 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 1282 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 1283 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 1284 v += 25; 1285 } 1286 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 1287 z += 5; y += 5; 1288 } 1289 VecRestoreArray_Fast(xx,x); 1290 VecRestoreArray_Fast(yy,y); 1291 VecRestoreArray_Fast(zz,z); 1292 PLogFlops(50*a->nz); 1293 return 0; 1294 } 1295 1296 #undef __FUNC__ 1297 #define __FUNC__ "MatMultAdd_SeqBAIJ_7" 1298 static int MatMultAdd_SeqBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1299 { 1300 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1301 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7; 1302 register Scalar x1,x2,x3,x4,x5,x6,x7; 1303 int mbs=a->mbs,i,*idx,*ii,j,n; 1304 1305 VecGetArray_Fast(xx,x); 1306 VecGetArray_Fast(yy,y); 1307 VecGetArray_Fast(zz,z); 1308 1309 idx = a->j; 1310 v = a->a; 1311 ii = a->i; 1312 1313 for ( i=0; i<mbs; i++ ) { 1314 n = ii[1] - ii[0]; ii++; 1315 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; sum7 = y[6]; 1316 for ( j=0; j<n; j++ ) { 1317 xb = x + 7*(*idx++); 1318 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6]; 1319 sum1 += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 1320 sum2 += v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 1321 sum3 += v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 1322 sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 1323 sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 1324 sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 1325 sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 1326 v += 49; 1327 } 1328 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7; 1329 z += 7; y += 7; 1330 } 1331 VecRestoreArray_Fast(xx,x); 1332 VecRestoreArray_Fast(yy,y); 1333 VecRestoreArray_Fast(zz,z); 1334 PLogFlops(98*a->nz); 1335 return 0; 1336 } 1337 1338 1339 #undef __FUNC__ 1340 #define __FUNC__ "MatMultAdd_SeqBAIJ_N" 1341 static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 1342 { 1343 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1344 register Scalar *x,*z,*v,*xb; 1345 int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr; 1346 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1347 1348 if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 1349 1350 VecGetArray_Fast(xx,x); 1351 VecGetArray_Fast(zz,z); 1352 1353 idx = a->j; 1354 v = a->a; 1355 ii = a->i; 1356 1357 1358 if (!a->mult_work) { 1359 k = PetscMax(a->m,a->n); 1360 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work); 1361 } 1362 work = a->mult_work; 1363 for ( i=0; i<mbs; i++ ) { 1364 n = ii[1] - ii[0]; ii++; 1365 ncols = n*bs; 1366 workt = work; 1367 for ( j=0; j<n; j++ ) { 1368 xb = x + bs*(*idx++); 1369 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 1370 workt += bs; 1371 } 1372 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); 1373 v += n*bs2; 1374 z += bs; 1375 } 1376 VecRestoreArray_Fast(xx,x); 1377 VecRestoreArray_Fast(zz,z); 1378 PLogFlops(2*a->nz*bs2 ); 1379 return 0; 1380 } 1381 1382 #undef __FUNC__ 1383 #define __FUNC__ "MatMultTrans_SeqBAIJ" 1384 int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz) 1385 { 1386 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1387 Scalar *xg,*zg,*zb; 1388 register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 1389 int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 1390 int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1391 1392 1393 VecGetArray_Fast(xx,xg); x = xg; 1394 VecGetArray_Fast(zz,zg); z = zg; 1395 PetscMemzero(z,N*sizeof(Scalar)); 1396 1397 idx = a->j; 1398 v = a->a; 1399 ii = a->i; 1400 1401 switch (bs) { 1402 case 1: 1403 for ( i=0; i<mbs; i++ ) { 1404 n = ii[1] - ii[0]; ii++; 1405 xb = x + i; x1 = xb[0]; 1406 ib = idx + ai[i]; 1407 for ( j=0; j<n; j++ ) { 1408 rval = ib[j]; 1409 z[rval] += *v++ * x1; 1410 } 1411 } 1412 break; 1413 case 2: 1414 for ( i=0; i<mbs; i++ ) { 1415 n = ii[1] - ii[0]; ii++; 1416 xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1417 ib = idx + ai[i]; 1418 for ( j=0; j<n; j++ ) { 1419 rval = ib[j]*2; 1420 z[rval++] += v[0]*x1 + v[1]*x2; 1421 z[rval++] += v[2]*x1 + v[3]*x2; 1422 v += 4; 1423 } 1424 } 1425 break; 1426 case 3: 1427 for ( i=0; i<mbs; i++ ) { 1428 n = ii[1] - ii[0]; ii++; 1429 xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1430 ib = idx + ai[i]; 1431 for ( j=0; j<n; j++ ) { 1432 rval = ib[j]*3; 1433 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1434 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1435 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1436 v += 9; 1437 } 1438 } 1439 break; 1440 case 4: 1441 for ( i=0; i<mbs; i++ ) { 1442 n = ii[1] - ii[0]; ii++; 1443 xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1444 ib = idx + ai[i]; 1445 for ( j=0; j<n; j++ ) { 1446 rval = ib[j]*4; 1447 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1448 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1449 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1450 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1451 v += 16; 1452 } 1453 } 1454 break; 1455 case 5: 1456 for ( i=0; i<mbs; i++ ) { 1457 n = ii[1] - ii[0]; ii++; 1458 xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1459 x4 = xb[3]; x5 = xb[4]; 1460 ib = idx + ai[i]; 1461 for ( j=0; j<n; j++ ) { 1462 rval = ib[j]*5; 1463 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1464 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1465 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1466 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1467 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1468 v += 25; 1469 } 1470 } 1471 break; 1472 /* block sizes larger then 5 by 5 are handled by BLAS */ 1473 default: { 1474 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1475 if (!a->mult_work) { 1476 k = PetscMax(a->m,a->n); 1477 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1478 CHKPTRQ(a->mult_work); 1479 } 1480 work = a->mult_work; 1481 for ( i=0; i<mbs; i++ ) { 1482 n = ii[1] - ii[0]; ii++; 1483 ncols = n*bs; 1484 PetscMemzero(work,ncols*sizeof(Scalar)); 1485 LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1486 v += n*bs2; 1487 x += bs; 1488 workt = work; 1489 for ( j=0; j<n; j++ ) { 1490 zb = z + bs*(*idx++); 1491 for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1492 workt += bs; 1493 } 1494 } 1495 } 1496 } 1497 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 1498 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1499 PLogFlops(2*a->nz*a->bs2 - a->n); 1500 return 0; 1501 } 1502 1503 #undef __FUNC__ 1504 #define __FUNC__ "MatMultTransAdd_SeqBAIJ" 1505 int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1506 { 1507 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1508 Scalar *xg,*zg,*zb; 1509 register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 1510 int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 1511 int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1512 1513 1514 1515 VecGetArray_Fast(xx,xg); x = xg; 1516 VecGetArray_Fast(zz,zg); z = zg; 1517 1518 if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 1519 else PetscMemzero(z,N*sizeof(Scalar)); 1520 1521 1522 idx = a->j; 1523 v = a->a; 1524 ii = a->i; 1525 1526 switch (bs) { 1527 case 1: 1528 for ( i=0; i<mbs; i++ ) { 1529 n = ii[1] - ii[0]; ii++; 1530 xb = x + i; x1 = xb[0]; 1531 ib = idx + ai[i]; 1532 for ( j=0; j<n; j++ ) { 1533 rval = ib[j]; 1534 z[rval] += *v++ * x1; 1535 } 1536 } 1537 break; 1538 case 2: 1539 for ( i=0; i<mbs; i++ ) { 1540 n = ii[1] - ii[0]; ii++; 1541 xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1542 ib = idx + ai[i]; 1543 for ( j=0; j<n; j++ ) { 1544 rval = ib[j]*2; 1545 z[rval++] += v[0]*x1 + v[1]*x2; 1546 z[rval++] += v[2]*x1 + v[3]*x2; 1547 v += 4; 1548 } 1549 } 1550 break; 1551 case 3: 1552 for ( i=0; i<mbs; i++ ) { 1553 n = ii[1] - ii[0]; ii++; 1554 xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1555 ib = idx + ai[i]; 1556 for ( j=0; j<n; j++ ) { 1557 rval = ib[j]*3; 1558 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1559 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1560 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1561 v += 9; 1562 } 1563 } 1564 break; 1565 case 4: 1566 for ( i=0; i<mbs; i++ ) { 1567 n = ii[1] - ii[0]; ii++; 1568 xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1569 ib = idx + ai[i]; 1570 for ( j=0; j<n; j++ ) { 1571 rval = ib[j]*4; 1572 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1573 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1574 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1575 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1576 v += 16; 1577 } 1578 } 1579 break; 1580 case 5: 1581 for ( i=0; i<mbs; i++ ) { 1582 n = ii[1] - ii[0]; ii++; 1583 xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1584 x4 = xb[3]; x5 = xb[4]; 1585 ib = idx + ai[i]; 1586 for ( j=0; j<n; j++ ) { 1587 rval = ib[j]*5; 1588 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1589 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1590 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1591 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1592 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1593 v += 25; 1594 } 1595 } 1596 break; 1597 /* block sizes larger then 5 by 5 are handled by BLAS */ 1598 default: { 1599 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1600 if (!a->mult_work) { 1601 k = PetscMax(a->m,a->n); 1602 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1603 CHKPTRQ(a->mult_work); 1604 } 1605 work = a->mult_work; 1606 for ( i=0; i<mbs; i++ ) { 1607 n = ii[1] - ii[0]; ii++; 1608 ncols = n*bs; 1609 PetscMemzero(work,ncols*sizeof(Scalar)); 1610 LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1611 v += n*bs2; 1612 x += bs; 1613 workt = work; 1614 for ( j=0; j<n; j++ ) { 1615 zb = z + bs*(*idx++); 1616 for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1617 workt += bs; 1618 } 1619 } 1620 } 1621 } 1622 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 1623 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1624 PLogFlops(2*a->nz*a->bs2); 1625 return 0; 1626 } 1627 1628 #undef __FUNC__ 1629 #define __FUNC__ "MatGetInfo_SeqBAIJ" /* ADIC Ignore */ 1630 int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info) 1631 { 1632 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1633 1634 info->rows_global = (double)a->m; 1635 info->columns_global = (double)a->n; 1636 info->rows_local = (double)a->m; 1637 info->columns_local = (double)a->n; 1638 info->block_size = a->bs2; 1639 info->nz_allocated = a->maxnz; 1640 info->nz_used = a->bs2*a->nz; 1641 info->nz_unneeded = (double)(info->nz_allocated - info->nz_used); 1642 /* if (info->nz_unneeded != A->info.nz_unneeded) 1643 printf("space descrepancy: maxnz-nz = %d, nz_unneeded = %d\n",(int)info->nz_unneeded,(int)A->info.nz_unneeded); */ 1644 info->assemblies = A->num_ass; 1645 info->mallocs = a->reallocs; 1646 info->memory = A->mem; 1647 if (A->factor) { 1648 info->fill_ratio_given = A->info.fill_ratio_given; 1649 info->fill_ratio_needed = A->info.fill_ratio_needed; 1650 info->factor_mallocs = A->info.factor_mallocs; 1651 } else { 1652 info->fill_ratio_given = 0; 1653 info->fill_ratio_needed = 0; 1654 info->factor_mallocs = 0; 1655 } 1656 return 0; 1657 } 1658 1659 #undef __FUNC__ 1660 #define __FUNC__ "MatEqual_SeqBAIJ" 1661 int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg) 1662 { 1663 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 1664 1665 if (B->type !=MATSEQBAIJ)SETERRQ(1,0,"Matrices must be same type"); 1666 1667 /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 1668 if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)|| 1669 (a->nz != b->nz)) { 1670 *flg = PETSC_FALSE; return 0; 1671 } 1672 1673 /* if the a->i are the same */ 1674 if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) { 1675 *flg = PETSC_FALSE; return 0; 1676 } 1677 1678 /* if a->j are the same */ 1679 if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) { 1680 *flg = PETSC_FALSE; return 0; 1681 } 1682 1683 /* if a->a are the same */ 1684 if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) { 1685 *flg = PETSC_FALSE; return 0; 1686 } 1687 *flg = PETSC_TRUE; 1688 return 0; 1689 1690 } 1691 1692 #undef __FUNC__ 1693 #define __FUNC__ "MatGetDiagonal_SeqBAIJ" 1694 int MatGetDiagonal_SeqBAIJ(Mat A,Vec v) 1695 { 1696 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1697 int i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 1698 Scalar *x, zero = 0.0,*aa,*aa_j; 1699 1700 if (A->factor) SETERRQ(1,0,"Not for factored matrix"); 1701 bs = a->bs; 1702 aa = a->a; 1703 ai = a->i; 1704 aj = a->j; 1705 ambs = a->mbs; 1706 bs2 = a->bs2; 1707 1708 VecSet(&zero,v); 1709 VecGetArray_Fast(v,x); VecGetLocalSize_Fast(v,n); 1710 if (n != a->m) SETERRQ(1,0,"Nonconforming matrix and vector"); 1711 for ( i=0; i<ambs; i++ ) { 1712 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1713 if (aj[j] == i) { 1714 row = i*bs; 1715 aa_j = aa+j*bs2; 1716 for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 1717 break; 1718 } 1719 } 1720 } 1721 return 0; 1722 } 1723 1724 #undef __FUNC__ 1725 #define __FUNC__ "MatDiagonalScale_SeqBAIJ" 1726 int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr) 1727 { 1728 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1729 Scalar *l,*r,x,*v,*aa,*li,*ri; 1730 int i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2; 1731 1732 ai = a->i; 1733 aj = a->j; 1734 aa = a->a; 1735 m = a->m; 1736 n = a->n; 1737 bs = a->bs; 1738 mbs = a->mbs; 1739 bs2 = a->bs2; 1740 if (ll) { 1741 VecGetArray_Fast(ll,l); VecGetLocalSize_Fast(ll,lm); 1742 if (lm != m) SETERRQ(1,0,"Left scaling vector wrong length"); 1743 for ( i=0; i<mbs; i++ ) { /* for each block row */ 1744 M = ai[i+1] - ai[i]; 1745 li = l + i*bs; 1746 v = aa + bs2*ai[i]; 1747 for ( j=0; j<M; j++ ) { /* for each block */ 1748 for ( k=0; k<bs2; k++ ) { 1749 (*v++) *= li[k%bs]; 1750 } 1751 } 1752 } 1753 } 1754 1755 if (rr) { 1756 VecGetArray_Fast(rr,r); VecGetLocalSize_Fast(rr,rn); 1757 if (rn != n) SETERRQ(1,0,"Right scaling vector wrong length"); 1758 for ( i=0; i<mbs; i++ ) { /* for each block row */ 1759 M = ai[i+1] - ai[i]; 1760 v = aa + bs2*ai[i]; 1761 for ( j=0; j<M; j++ ) { /* for each block */ 1762 ri = r + bs*aj[ai[i]+j]; 1763 for ( k=0; k<bs; k++ ) { 1764 x = ri[k]; 1765 for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x; 1766 } 1767 } 1768 } 1769 } 1770 return 0; 1771 } 1772 1773 1774 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 1775 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 1776 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int); 1777 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*); 1778 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**); 1779 1780 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec); 1781 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec); 1782 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec); 1783 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec); 1784 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec); 1785 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec); 1786 extern int MatSolve_SeqBAIJ_7(Mat,Vec,Vec); 1787 1788 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 1789 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 1790 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 1791 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 1792 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 1793 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 1794 1795 #undef __FUNC__ 1796 #define __FUNC__ "MatNorm_SeqBAIJ" 1797 int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm) 1798 { 1799 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1800 Scalar *v = a->a; 1801 double sum = 0.0; 1802 int i,nz=a->nz,bs2=a->bs2; 1803 1804 if (type == NORM_FROBENIUS) { 1805 for (i=0; i< bs2*nz; i++ ) { 1806 #if defined(PETSC_COMPLEX) 1807 sum += real(conj(*v)*(*v)); v++; 1808 #else 1809 sum += (*v)*(*v); v++; 1810 #endif 1811 } 1812 *norm = sqrt(sum); 1813 } 1814 else { 1815 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 1816 } 1817 return 0; 1818 } 1819 1820 /* 1821 note: This can only work for identity for row and col. It would 1822 be good to check this and otherwise generate an error. 1823 */ 1824 #undef __FUNC__ 1825 #define __FUNC__ "MatILUFactor_SeqBAIJ" 1826 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill) 1827 { 1828 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1829 Mat outA; 1830 int ierr; 1831 1832 if (fill != 0) SETERRQ(1,0,"Only fill=0 supported"); 1833 1834 outA = inA; 1835 inA->factor = FACTOR_LU; 1836 a->row = row; 1837 a->col = col; 1838 1839 a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 1840 1841 if (!a->diag) { 1842 ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr); 1843 } 1844 ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr); 1845 return 0; 1846 } 1847 1848 #undef __FUNC__ 1849 #define __FUNC__ "MatScale_SeqBAIJ" 1850 int MatScale_SeqBAIJ(Scalar *alpha,Mat inA) 1851 { 1852 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1853 int one = 1, totalnz = a->bs2*a->nz; 1854 BLscal_( &totalnz, alpha, a->a, &one ); 1855 PLogFlops(totalnz); 1856 return 0; 1857 } 1858 1859 #undef __FUNC__ 1860 #define __FUNC__ "MatGetValues_SeqBAIJ" 1861 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 1862 { 1863 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1864 int *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 1865 int *ai = a->i, *ailen = a->ilen; 1866 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 1867 Scalar *ap, *aa = a->a, zero = 0.0; 1868 1869 for ( k=0; k<m; k++ ) { /* loop over rows */ 1870 row = im[k]; brow = row/bs; 1871 if (row < 0) SETERRQ(1,0,"Negative row"); 1872 if (row >= a->m) SETERRQ(1,0,"Row too large"); 1873 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1874 nrow = ailen[brow]; 1875 for ( l=0; l<n; l++ ) { /* loop over columns */ 1876 if (in[l] < 0) SETERRQ(1,0,"Negative column"); 1877 if (in[l] >= a->n) SETERRQ(1,0,"Column too large"); 1878 col = in[l] ; 1879 bcol = col/bs; 1880 cidx = col%bs; 1881 ridx = row%bs; 1882 high = nrow; 1883 low = 0; /* assume unsorted */ 1884 while (high-low > 5) { 1885 t = (low+high)/2; 1886 if (rp[t] > bcol) high = t; 1887 else low = t; 1888 } 1889 for ( i=low; i<high; i++ ) { 1890 if (rp[i] > bcol) break; 1891 if (rp[i] == bcol) { 1892 *v++ = ap[bs2*i+bs*cidx+ridx]; 1893 goto finished; 1894 } 1895 } 1896 *v++ = zero; 1897 finished:; 1898 } 1899 } 1900 return 0; 1901 } 1902 1903 #undef __FUNC__ 1904 #define __FUNC__ "MatGetBlockSize_SeqBAIJ" /* ADIC Ignore */ 1905 int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs) 1906 { 1907 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data; 1908 *bs = baij->bs; 1909 return 0; 1910 } 1911 1912 /* idx should be of length atlease bs */ 1913 #undef __FUNC__ 1914 #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Block" 1915 static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg) 1916 { 1917 int i,row; 1918 row = idx[0]; 1919 if (row%bs!=0) { *flg = PETSC_FALSE; return 0; } 1920 1921 for ( i=1; i<bs; i++ ) { 1922 if (row+i != idx[i]) { *flg = PETSC_FALSE; return 0; } 1923 } 1924 *flg = PETSC_TRUE; 1925 return 0; 1926 } 1927 1928 #undef __FUNC__ 1929 #define __FUNC__ "MatZeroRows_SeqBAIJ" 1930 int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag) 1931 { 1932 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1933 IS is_local; 1934 int ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2; 1935 PetscTruth flg; 1936 Scalar zero = 0.0,*aa; 1937 1938 /* Make a copy of the IS and sort it */ 1939 ierr = ISGetSize(is,&is_n);CHKERRQ(ierr); 1940 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1941 ierr = ISCreateGeneral(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr); 1942 ierr = ISSort(is_local); CHKERRQ(ierr); 1943 ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr); 1944 1945 i = 0; 1946 while (i < is_n) { 1947 if (rows[i]<0 || rows[i]>m) SETERRQ(1,0,"row out of range"); 1948 flg = PETSC_FALSE; 1949 if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); } 1950 if (flg) { /* There exists a block of rows to be Zerowed */ 1951 baij->ilen[rows[i]/bs] = 0; 1952 i += bs; 1953 } else { /* Zero out only the requested row */ 1954 count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs; 1955 aa = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs); 1956 for ( j=0; j<count; j++ ) { 1957 aa[0] = zero; 1958 aa+=bs; 1959 } 1960 i++; 1961 } 1962 } 1963 if (diag) { 1964 for ( j=0; j<is_n; j++ ) { 1965 ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1966 } 1967 } 1968 ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr); 1969 ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr); 1970 ierr = ISDestroy(is_local); CHKERRQ(ierr); 1971 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1972 1973 return 0; 1974 } 1975 1976 #undef __FUNC__ 1977 #define __FUNC__ "MatPrintHelp_SeqBAIJ" /* ADIC Ignore */ 1978 int MatPrintHelp_SeqBAIJ(Mat A) 1979 { 1980 static int called = 0; 1981 MPI_Comm comm = A->comm; 1982 1983 if (called) return 0; else called = 1; 1984 PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n"); 1985 PetscPrintf(comm," -mat_block_size <block_size>\n"); 1986 return 0; 1987 } 1988 1989 /* -------------------------------------------------------------------*/ 1990 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ, 1991 MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ, 1992 MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N, 1993 MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ, 1994 MatSolve_SeqBAIJ_N,0, 1995 0,0, 1996 MatLUFactor_SeqBAIJ,0, 1997 0, 1998 MatTranspose_SeqBAIJ, 1999 MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ, 2000 MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ, 2001 0,MatAssemblyEnd_SeqBAIJ, 2002 0, 2003 MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ, 2004 MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0, 2005 MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ, 2006 MatILUFactorSymbolic_SeqBAIJ,0, 2007 0,0, 2008 MatConvertSameType_SeqBAIJ,0,0, 2009 MatILUFactor_SeqBAIJ,0,0, 2010 MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ, 2011 MatGetValues_SeqBAIJ,0, 2012 MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ, 2013 0,0,0,MatGetBlockSize_SeqBAIJ, 2014 MatGetRowIJ_SeqBAIJ, 2015 MatRestoreRowIJ_SeqBAIJ, 2016 0,0,0,0,0,0, 2017 MatSetValuesBlocked_SeqBAIJ}; 2018 2019 #undef __FUNC__ 2020 #define __FUNC__ "MatCreateSeqBAIJ" 2021 /*@C 2022 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 2023 compressed row) format. For good matrix assembly performance the 2024 user should preallocate the matrix storage by setting the parameter nz 2025 (or the array nzz). By setting these parameters accurately, performance 2026 during matrix assembly can be increased by more than a factor of 50. 2027 2028 Input Parameters: 2029 . comm - MPI communicator, set to MPI_COMM_SELF 2030 . bs - size of block 2031 . m - number of rows 2032 . n - number of columns 2033 . nz - number of block nonzeros per block row (same for all rows) 2034 . nzz - array containing the number of block nonzeros in the various block rows 2035 (possibly different for each block row) or PETSC_NULL 2036 2037 Output Parameter: 2038 . A - the matrix 2039 2040 Options Database Keys: 2041 $ -mat_no_unroll - uses code that does not unroll the loops in the 2042 $ block calculations (much solver) 2043 $ -mat_block_size - size of the blocks to use 2044 2045 Notes: 2046 The block AIJ format is fully compatible with standard Fortran 77 2047 storage. That is, the stored row and column indices can begin at 2048 either one (as in Fortran) or zero. See the users' manual for details. 2049 2050 Specify the preallocated storage with either nz or nnz (not both). 2051 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2052 allocation. For additional details, see the users manual chapter on 2053 matrices. 2054 2055 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2056 @*/ 2057 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 2058 { 2059 Mat B; 2060 Mat_SeqBAIJ *b; 2061 int i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs,size; 2062 2063 MPI_Comm_size(comm,&size); 2064 if (size > 1) SETERRQ(1,0,"Comm must be of size 1"); 2065 2066 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr); 2067 2068 if (mbs*bs!=m || nbs*bs!=n) 2069 SETERRQ(1,0,"Number rows, cols must be divisible by blocksize"); 2070 2071 *A = 0; 2072 PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm); 2073 PLogObjectCreate(B); 2074 B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b); 2075 PetscMemzero(b,sizeof(Mat_SeqBAIJ)); 2076 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 2077 ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr); 2078 if (!flg) { 2079 switch (bs) { 2080 case 1: 2081 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2082 B->ops.solve = MatSolve_SeqBAIJ_1; 2083 B->ops.mult = MatMult_SeqBAIJ_1; 2084 B->ops.multadd = MatMultAdd_SeqBAIJ_1; 2085 break; 2086 case 2: 2087 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2088 B->ops.solve = MatSolve_SeqBAIJ_2; 2089 B->ops.mult = MatMult_SeqBAIJ_2; 2090 B->ops.multadd = MatMultAdd_SeqBAIJ_2; 2091 break; 2092 case 3: 2093 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2094 B->ops.solve = MatSolve_SeqBAIJ_3; 2095 B->ops.mult = MatMult_SeqBAIJ_3; 2096 B->ops.multadd = MatMultAdd_SeqBAIJ_3; 2097 break; 2098 case 4: 2099 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2100 B->ops.solve = MatSolve_SeqBAIJ_4; 2101 B->ops.mult = MatMult_SeqBAIJ_4; 2102 B->ops.multadd = MatMultAdd_SeqBAIJ_4; 2103 break; 2104 case 5: 2105 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2106 B->ops.solve = MatSolve_SeqBAIJ_5; 2107 B->ops.mult = MatMult_SeqBAIJ_5; 2108 B->ops.multadd = MatMultAdd_SeqBAIJ_5; 2109 break; 2110 case 7: 2111 B->ops.mult = MatMult_SeqBAIJ_7; 2112 B->ops.solve = MatSolve_SeqBAIJ_7; 2113 B->ops.multadd = MatMultAdd_SeqBAIJ_7; 2114 break; 2115 } 2116 } 2117 B->destroy = MatDestroy_SeqBAIJ; 2118 B->view = MatView_SeqBAIJ; 2119 B->factor = 0; 2120 B->lupivotthreshold = 1.0; 2121 B->mapping = 0; 2122 b->row = 0; 2123 b->col = 0; 2124 b->reallocs = 0; 2125 2126 b->m = m; B->m = m; B->M = m; 2127 b->n = n; B->n = n; B->N = n; 2128 b->mbs = mbs; 2129 b->nbs = nbs; 2130 b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax); 2131 if (nnz == PETSC_NULL) { 2132 if (nz == PETSC_DEFAULT) nz = 5; 2133 else if (nz <= 0) nz = 1; 2134 for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 2135 nz = nz*mbs; 2136 } 2137 else { 2138 nz = 0; 2139 for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 2140 } 2141 2142 /* allocate the matrix space */ 2143 len = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int); 2144 b->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a); 2145 PetscMemzero(b->a,nz*bs2*sizeof(Scalar)); 2146 b->j = (int *) (b->a + nz*bs2); 2147 PetscMemzero(b->j,nz*sizeof(int)); 2148 b->i = b->j + nz; 2149 b->singlemalloc = 1; 2150 2151 b->i[0] = 0; 2152 for (i=1; i<mbs+1; i++) { 2153 b->i[i] = b->i[i-1] + b->imax[i-1]; 2154 } 2155 2156 /* b->ilen will count nonzeros in each block row so far. */ 2157 b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 2158 PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 2159 for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 2160 2161 b->bs = bs; 2162 b->bs2 = bs2; 2163 b->mbs = mbs; 2164 b->nz = 0; 2165 b->maxnz = nz*bs2; 2166 b->sorted = 0; 2167 b->roworiented = 1; 2168 b->nonew = 0; 2169 b->diag = 0; 2170 b->solve_work = 0; 2171 b->mult_work = 0; 2172 b->spptr = 0; 2173 B->info.nz_unneeded = (double)b->maxnz; 2174 2175 *A = B; 2176 ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr); 2177 if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); } 2178 return 0; 2179 } 2180 2181 #undef __FUNC__ 2182 #define __FUNC__ "MatConvertSameType_SeqBAIJ" 2183 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues) 2184 { 2185 Mat C; 2186 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 2187 int i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 2188 2189 if (a->i[mbs] != nz) SETERRQ(1,0,"Corrupt matrix"); 2190 2191 *B = 0; 2192 PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm); 2193 PLogObjectCreate(C); 2194 C->data = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c); 2195 PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps)); 2196 C->destroy = MatDestroy_SeqBAIJ; 2197 C->view = MatView_SeqBAIJ; 2198 C->factor = A->factor; 2199 c->row = 0; 2200 c->col = 0; 2201 C->assembled = PETSC_TRUE; 2202 2203 c->m = C->m = a->m; 2204 c->n = C->n = a->n; 2205 C->M = a->m; 2206 C->N = a->n; 2207 2208 c->bs = a->bs; 2209 c->bs2 = a->bs2; 2210 c->mbs = a->mbs; 2211 c->nbs = a->nbs; 2212 2213 c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax); 2214 c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen); 2215 for ( i=0; i<mbs; i++ ) { 2216 c->imax[i] = a->imax[i]; 2217 c->ilen[i] = a->ilen[i]; 2218 } 2219 2220 /* allocate the matrix space */ 2221 c->singlemalloc = 1; 2222 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int)); 2223 c->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a); 2224 c->j = (int *) (c->a + nz*bs2); 2225 c->i = c->j + nz; 2226 PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int)); 2227 if (mbs > 0) { 2228 PetscMemcpy(c->j,a->j,nz*sizeof(int)); 2229 if (cpvalues == COPY_VALUES) { 2230 PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar)); 2231 } 2232 } 2233 2234 PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 2235 c->sorted = a->sorted; 2236 c->roworiented = a->roworiented; 2237 c->nonew = a->nonew; 2238 2239 if (a->diag) { 2240 c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag); 2241 PLogObjectMemory(C,(mbs+1)*sizeof(int)); 2242 for ( i=0; i<mbs; i++ ) { 2243 c->diag[i] = a->diag[i]; 2244 } 2245 } 2246 else c->diag = 0; 2247 c->nz = a->nz; 2248 c->maxnz = a->maxnz; 2249 c->solve_work = 0; 2250 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 2251 c->mult_work = 0; 2252 *B = C; 2253 return 0; 2254 } 2255 2256 #undef __FUNC__ 2257 #define __FUNC__ "MatLoad_SeqBAIJ" 2258 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 2259 { 2260 Mat_SeqBAIJ *a; 2261 Mat B; 2262 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg; 2263 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2264 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 2265 int *masked, nmask,tmp,bs2,ishift; 2266 Scalar *aa; 2267 MPI_Comm comm = ((PetscObject) viewer)->comm; 2268 2269 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 2270 bs2 = bs*bs; 2271 2272 MPI_Comm_size(comm,&size); 2273 if (size > 1) SETERRQ(1,0,"view must have one processor"); 2274 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 2275 ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr); 2276 if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not Mat object"); 2277 M = header[1]; N = header[2]; nz = header[3]; 2278 2279 if (M != N) SETERRQ(1,0,"Can only do square matrices"); 2280 2281 /* 2282 This code adds extra rows to make sure the number of rows is 2283 divisible by the blocksize 2284 */ 2285 mbs = M/bs; 2286 extra_rows = bs - M + bs*(mbs); 2287 if (extra_rows == bs) extra_rows = 0; 2288 else mbs++; 2289 if (extra_rows) { 2290 PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 2291 } 2292 2293 /* read in row lengths */ 2294 rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 2295 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 2296 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 2297 2298 /* read in column indices */ 2299 jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj); 2300 ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr); 2301 for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 2302 2303 /* loop over row lengths determining block row lengths */ 2304 browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 2305 PetscMemzero(browlengths,mbs*sizeof(int)); 2306 mask = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask); 2307 PetscMemzero(mask,mbs*sizeof(int)); 2308 masked = mask + mbs; 2309 rowcount = 0; nzcount = 0; 2310 for ( i=0; i<mbs; i++ ) { 2311 nmask = 0; 2312 for ( j=0; j<bs; j++ ) { 2313 kmax = rowlengths[rowcount]; 2314 for ( k=0; k<kmax; k++ ) { 2315 tmp = jj[nzcount++]/bs; 2316 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2317 } 2318 rowcount++; 2319 } 2320 browlengths[i] += nmask; 2321 /* zero out the mask elements we set */ 2322 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 2323 } 2324 2325 /* create our matrix */ 2326 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A); 2327 CHKERRQ(ierr); 2328 B = *A; 2329 a = (Mat_SeqBAIJ *) B->data; 2330 2331 /* set matrix "i" values */ 2332 a->i[0] = 0; 2333 for ( i=1; i<= mbs; i++ ) { 2334 a->i[i] = a->i[i-1] + browlengths[i-1]; 2335 a->ilen[i-1] = browlengths[i-1]; 2336 } 2337 a->nz = 0; 2338 for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 2339 2340 /* read in nonzero values */ 2341 aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 2342 ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr); 2343 for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 2344 2345 /* set "a" and "j" values into matrix */ 2346 nzcount = 0; jcount = 0; 2347 for ( i=0; i<mbs; i++ ) { 2348 nzcountb = nzcount; 2349 nmask = 0; 2350 for ( j=0; j<bs; j++ ) { 2351 kmax = rowlengths[i*bs+j]; 2352 for ( k=0; k<kmax; k++ ) { 2353 tmp = jj[nzcount++]/bs; 2354 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2355 } 2356 rowcount++; 2357 } 2358 /* sort the masked values */ 2359 PetscSortInt(nmask,masked); 2360 2361 /* set "j" values into matrix */ 2362 maskcount = 1; 2363 for ( j=0; j<nmask; j++ ) { 2364 a->j[jcount++] = masked[j]; 2365 mask[masked[j]] = maskcount++; 2366 } 2367 /* set "a" values into matrix */ 2368 ishift = bs2*a->i[i]; 2369 for ( j=0; j<bs; j++ ) { 2370 kmax = rowlengths[i*bs+j]; 2371 for ( k=0; k<kmax; k++ ) { 2372 tmp = jj[nzcountb]/bs ; 2373 block = mask[tmp] - 1; 2374 point = jj[nzcountb] - bs*tmp; 2375 idx = ishift + bs2*block + j + bs*point; 2376 a->a[idx] = aa[nzcountb++]; 2377 } 2378 } 2379 /* zero out the mask elements we set */ 2380 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 2381 } 2382 if (jcount != a->nz) SETERRQ(1,0,"Error bad binary matrix"); 2383 2384 PetscFree(rowlengths); 2385 PetscFree(browlengths); 2386 PetscFree(aa); 2387 PetscFree(jj); 2388 PetscFree(mask); 2389 2390 B->assembled = PETSC_TRUE; 2391 2392 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr); 2393 if (flg) { 2394 Viewer tviewer; 2395 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2396 ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO,0);CHKERRQ(ierr); 2397 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2398 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2399 } 2400 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr); 2401 if (flg) { 2402 Viewer tviewer; 2403 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2404 ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO_LONG,0);CHKERRQ(ierr); 2405 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2406 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2407 } 2408 ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr); 2409 if (flg) { 2410 Viewer tviewer; 2411 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2412 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2413 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2414 } 2415 ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr); 2416 if (flg) { 2417 Viewer tviewer; 2418 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2419 ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_MATLAB,"M");CHKERRQ(ierr); 2420 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2421 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2422 } 2423 ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr); 2424 if (flg) { 2425 Viewer tviewer; 2426 ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr); 2427 ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr); 2428 ierr = ViewerFlush(tviewer); CHKERRQ(ierr); 2429 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2430 } 2431 return 0; 2432 } 2433 2434 2435 2436