1 #ifndef lint 2 static char vcid[] = "$Id: baij.c,v 1.98 1997/04/10 00:03:26 bsmith Exp curfman $"; 3 #endif 4 5 /* 6 Defines the basic matrix operations for the BAIJ (compressed row) 7 matrix storage format. 8 */ 9 10 #include "pinclude/pviewer.h" 11 #include "sys.h" 12 #include "src/mat/impls/baij/seq/baij.h" 13 #include "src/vec/vecimpl.h" 14 #include "src/inline/spops.h" 15 #include "petsc.h" 16 17 18 /* 19 Adds diagonal pointers to sparse matrix structure. 20 */ 21 22 #undef __FUNC__ 23 #define __FUNC__ "MatMarkDiag_SeqBAIJ" 24 int MatMarkDiag_SeqBAIJ(Mat A) 25 { 26 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 27 int i,j, *diag, m = a->mbs; 28 29 diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag); 30 PLogObjectMemory(A,(m+1)*sizeof(int)); 31 for ( i=0; i<m; i++ ) { 32 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 33 if (a->j[j] == i) { 34 diag[i] = j; 35 break; 36 } 37 } 38 } 39 a->diag = diag; 40 return 0; 41 } 42 43 44 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 45 46 #undef __FUNC__ 47 #define __FUNC__ "MatGetRowIJ_SeqBAIJ" 48 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 49 PetscTruth *done) 50 { 51 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 52 int ierr, n = a->mbs,i; 53 54 *nn = n; 55 if (!ia) return 0; 56 if (symmetric) { 57 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 58 } else if (oshift == 1) { 59 /* temporarily add 1 to i and j indices */ 60 int nz = a->i[n] + 1; 61 for ( i=0; i<nz; i++ ) a->j[i]++; 62 for ( i=0; i<n+1; i++ ) a->i[i]++; 63 *ia = a->i; *ja = a->j; 64 } else { 65 *ia = a->i; *ja = a->j; 66 } 67 68 return 0; 69 } 70 71 #undef __FUNC__ 72 #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ" /* ADIC Ignore */ 73 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 74 PetscTruth *done) 75 { 76 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 77 int i,n = a->mbs; 78 79 if (!ia) return 0; 80 if (symmetric) { 81 PetscFree(*ia); 82 PetscFree(*ja); 83 } else if (oshift == 1) { 84 int nz = a->i[n]; 85 for ( i=0; i<nz; i++ ) a->j[i]--; 86 for ( i=0; i<n+1; i++ ) a->i[i]--; 87 } 88 return 0; 89 } 90 91 92 #undef __FUNC__ 93 #define __FUNC__ "MatView_SeqBAIJ_Binary" /* ADIC Ignore */ 94 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 95 { 96 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 97 int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 98 Scalar *aa; 99 100 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 101 col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 102 col_lens[0] = MAT_COOKIE; 103 104 col_lens[1] = a->m; 105 col_lens[2] = a->n; 106 col_lens[3] = a->nz*bs2; 107 108 /* store lengths of each row and write (including header) to file */ 109 count = 0; 110 for ( i=0; i<a->mbs; i++ ) { 111 for ( j=0; j<bs; j++ ) { 112 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 113 } 114 } 115 ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr); 116 PetscFree(col_lens); 117 118 /* store column indices (zero start index) */ 119 jj = (int *) PetscMalloc( (a->nz+1)*bs2*sizeof(int) ); CHKPTRQ(jj); 120 count = 0; 121 for ( i=0; i<a->mbs; i++ ) { 122 for ( j=0; j<bs; j++ ) { 123 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 124 for ( l=0; l<bs; l++ ) { 125 jj[count++] = bs*a->j[k] + l; 126 } 127 } 128 } 129 } 130 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr); 131 PetscFree(jj); 132 133 /* store nonzero values */ 134 aa = (Scalar *) PetscMalloc((a->nz+1)*bs2*sizeof(Scalar)); CHKPTRQ(aa); 135 count = 0; 136 for ( i=0; i<a->mbs; i++ ) { 137 for ( j=0; j<bs; j++ ) { 138 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 139 for ( l=0; l<bs; l++ ) { 140 aa[count++] = a->a[bs2*k + l*bs + j]; 141 } 142 } 143 } 144 } 145 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr); 146 PetscFree(aa); 147 return 0; 148 } 149 150 #undef __FUNC__ 151 #define __FUNC__ "MatView_SeqBAIJ_ASCII" /* ADIC Ignore */ 152 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 153 { 154 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 155 int ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2; 156 FILE *fd; 157 char *outputname; 158 159 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 160 ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr); 161 ierr = ViewerGetFormat(viewer,&format); 162 if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) { 163 fprintf(fd," block size is %d\n",bs); 164 } 165 else if (format == VIEWER_FORMAT_ASCII_MATLAB) { 166 SETERRQ(1,0,"Matlab format not supported"); 167 } 168 else if (format == VIEWER_FORMAT_ASCII_COMMON) { 169 for ( i=0; i<a->mbs; i++ ) { 170 for ( j=0; j<bs; j++ ) { 171 fprintf(fd,"row %d:",i*bs+j); 172 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 173 for ( l=0; l<bs; l++ ) { 174 #if defined(PETSC_COMPLEX) 175 if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0) 176 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 177 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 178 else if (real(a->a[bs2*k + l*bs + j]) != 0.0) 179 fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 180 #else 181 if (a->a[bs2*k + l*bs + j] != 0.0) 182 fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 183 #endif 184 } 185 } 186 fprintf(fd,"\n"); 187 } 188 } 189 } 190 else { 191 for ( i=0; i<a->mbs; i++ ) { 192 for ( j=0; j<bs; j++ ) { 193 fprintf(fd,"row %d:",i*bs+j); 194 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 195 for ( l=0; l<bs; l++ ) { 196 #if defined(PETSC_COMPLEX) 197 if (imag(a->a[bs2*k + l*bs + j]) != 0.0) { 198 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 199 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 200 } 201 else { 202 fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 203 } 204 #else 205 fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 206 #endif 207 } 208 } 209 fprintf(fd,"\n"); 210 } 211 } 212 } 213 fflush(fd); 214 return 0; 215 } 216 217 #undef __FUNC__ 218 #define __FUNC__ "MatView_SeqBAIJ_Draw" /* ADIC Ignore */ 219 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer) 220 { 221 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *) A->data; 222 int row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2; 223 double xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r; 224 Scalar *aa; 225 Draw draw; 226 DrawButton button; 227 PetscTruth isnull; 228 229 ViewerDrawGetDraw(viewer,&draw); 230 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 231 232 xr = a->n; yr = a->m; h = yr/10.0; w = xr/10.0; 233 xr += w; yr += h; xl = -w; yl = -h; 234 ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr); 235 /* loop over matrix elements drawing boxes */ 236 color = DRAW_BLUE; 237 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 238 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 239 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 240 x_l = a->j[j]*bs; x_r = x_l + 1.0; 241 aa = a->a + j*bs2; 242 for ( k=0; k<bs; k++ ) { 243 for ( l=0; l<bs; l++ ) { 244 if (PetscReal(*aa++) >= 0.) continue; 245 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 246 } 247 } 248 } 249 } 250 color = DRAW_CYAN; 251 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 252 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 253 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 254 x_l = a->j[j]*bs; x_r = x_l + 1.0; 255 aa = a->a + j*bs2; 256 for ( k=0; k<bs; k++ ) { 257 for ( l=0; l<bs; l++ ) { 258 if (PetscReal(*aa++) != 0.) continue; 259 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 260 } 261 } 262 } 263 } 264 265 color = DRAW_RED; 266 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 267 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 268 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 269 x_l = a->j[j]*bs; x_r = x_l + 1.0; 270 aa = a->a + j*bs2; 271 for ( k=0; k<bs; k++ ) { 272 for ( l=0; l<bs; l++ ) { 273 if (PetscReal(*aa++) <= 0.) continue; 274 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 275 } 276 } 277 } 278 } 279 280 DrawFlush(draw); 281 DrawGetPause(draw,&pause); 282 if (pause >= 0) { PetscSleep(pause); return 0;} 283 284 /* allow the matrix to zoom or shrink */ 285 ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0); 286 while (button != BUTTON_RIGHT) { 287 DrawClear(draw); 288 if (button == BUTTON_LEFT) scale = .5; 289 else if (button == BUTTON_CENTER) scale = 2.; 290 xl = scale*(xl + w - xc) + xc - w*scale; 291 xr = scale*(xr - w - xc) + xc + w*scale; 292 yl = scale*(yl + h - yc) + yc - h*scale; 293 yr = scale*(yr - h - yc) + yc + h*scale; 294 w *= scale; h *= scale; 295 ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr); 296 color = DRAW_BLUE; 297 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 298 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 299 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 300 x_l = a->j[j]*bs; x_r = x_l + 1.0; 301 aa = a->a + j*bs2; 302 for ( k=0; k<bs; k++ ) { 303 for ( l=0; l<bs; l++ ) { 304 if (PetscReal(*aa++) >= 0.) continue; 305 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 306 } 307 } 308 } 309 } 310 color = DRAW_CYAN; 311 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 312 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 313 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 314 x_l = a->j[j]*bs; x_r = x_l + 1.0; 315 aa = a->a + j*bs2; 316 for ( k=0; k<bs; k++ ) { 317 for ( l=0; l<bs; l++ ) { 318 if (PetscReal(*aa++) != 0.) continue; 319 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 320 } 321 } 322 } 323 } 324 325 color = DRAW_RED; 326 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 327 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 328 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 329 x_l = a->j[j]*bs; x_r = x_l + 1.0; 330 aa = a->a + j*bs2; 331 for ( k=0; k<bs; k++ ) { 332 for ( l=0; l<bs; l++ ) { 333 if (PetscReal(*aa++) <= 0.) continue; 334 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 335 } 336 } 337 } 338 } 339 ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0); 340 } 341 return 0; 342 } 343 344 #undef __FUNC__ 345 #define __FUNC__ "MatView_SeqBAIJ" /* ADIC Ignore */ 346 int MatView_SeqBAIJ(PetscObject obj,Viewer viewer) 347 { 348 Mat A = (Mat) obj; 349 ViewerType vtype; 350 int ierr; 351 352 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 353 if (vtype == MATLAB_VIEWER) { 354 SETERRQ(1,0,"Matlab viewer not supported"); 355 } 356 else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){ 357 return MatView_SeqBAIJ_ASCII(A,viewer); 358 } 359 else if (vtype == BINARY_FILE_VIEWER) { 360 return MatView_SeqBAIJ_Binary(A,viewer); 361 } 362 else if (vtype == DRAW_VIEWER) { 363 return MatView_SeqBAIJ_Draw(A,viewer); 364 } 365 return 0; 366 } 367 368 #define CHUNKSIZE 10 369 370 #undef __FUNC__ 371 #define __FUNC__ "MatSetValues_SeqBAIJ" 372 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 373 { 374 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 375 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 376 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 377 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 378 int ridx,cidx,bs2=a->bs2; 379 Scalar *ap,value,*aa=a->a,*bap; 380 381 for ( k=0; k<m; k++ ) { /* loop over added rows */ 382 row = im[k]; brow = row/bs; 383 #if defined(PETSC_BOPT_g) 384 if (row < 0) SETERRQ(1,0,"Negative row"); 385 if (row >= a->m) SETERRQ(1,0,"Row too large"); 386 #endif 387 rp = aj + ai[brow]; 388 ap = aa + bs2*ai[brow]; 389 rmax = imax[brow]; 390 nrow = ailen[brow]; 391 low = 0; 392 for ( l=0; l<n; l++ ) { /* loop over added columns */ 393 #if defined(PETSC_BOPT_g) 394 if (in[l] < 0) SETERRQ(1,0,"Negative column"); 395 if (in[l] >= a->n) SETERRQ(1,0,"Column too large"); 396 #endif 397 col = in[l]; bcol = col/bs; 398 ridx = row % bs; cidx = col % bs; 399 if (roworiented) { 400 value = *v++; 401 } 402 else { 403 value = v[k + l*m]; 404 } 405 if (!sorted) low = 0; high = nrow; 406 while (high-low > 7) { 407 t = (low+high)/2; 408 if (rp[t] > bcol) high = t; 409 else low = t; 410 } 411 for ( i=low; i<high; i++ ) { 412 if (rp[i] > bcol) break; 413 if (rp[i] == bcol) { 414 bap = ap + bs2*i + bs*cidx + ridx; 415 if (is == ADD_VALUES) *bap += value; 416 else *bap = value; 417 goto noinsert1; 418 } 419 } 420 if (nonew == 1) goto noinsert1; 421 else if (nonew == -1) SETERRQ(1,1,"Inserting a new nonzero in the matrix"); 422 if (nrow >= rmax) { 423 /* there is no extra room in row, therefore enlarge */ 424 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 425 Scalar *new_a; 426 427 if (nonew == -2) SETERRQ(1,1,"Inserting a new nonzero in the matrix"); 428 429 /* Malloc new storage space */ 430 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 431 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 432 new_j = (int *) (new_a + bs2*new_nz); 433 new_i = new_j + new_nz; 434 435 /* copy over old data into new slots */ 436 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} 437 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 438 PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); 439 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 440 PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, 441 len*sizeof(int)); 442 PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); 443 PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 444 PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), 445 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); 446 /* free up old matrix storage */ 447 PetscFree(a->a); 448 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 449 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 450 a->singlemalloc = 1; 451 452 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 453 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 454 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 455 a->maxnz += bs2*CHUNKSIZE; 456 a->reallocs++; 457 a->nz++; 458 } 459 N = nrow++ - 1; 460 /* shift up all the later entries in this row */ 461 for ( ii=N; ii>=i; ii-- ) { 462 rp[ii+1] = rp[ii]; 463 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 464 } 465 if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 466 rp[i] = bcol; 467 ap[bs2*i + bs*cidx + ridx] = value; 468 noinsert1:; 469 low = i; 470 } 471 ailen[brow] = nrow; 472 } 473 return 0; 474 } 475 476 #undef __FUNC__ 477 #define __FUNC__ "MatSetValues_SeqBAIJ" 478 int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 479 { 480 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 481 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 482 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 483 int roworiented=a->roworiented; 484 int *aj=a->j,nonew=a->nonew; 485 int bs2=a->bs2,bs=a->bs,stepval; 486 Scalar *ap,*value=v,*aa=a->a,*bap; 487 488 if (roworiented) { 489 stepval = (n-1)*bs; 490 } else { 491 stepval = (m-1)*bs; 492 } 493 for ( k=0; k<m; k++ ) { /* loop over added rows */ 494 row = im[k]; 495 #if defined(PETSC_BOPT_g) 496 if (row < 0) SETERRQ(1,0,"Negative row"); 497 if (row >= a->mbs) SETERRQ(1,0,"Row too large"); 498 #endif 499 rp = aj + ai[row]; 500 ap = aa + bs2*ai[row]; 501 rmax = imax[row]; 502 nrow = ailen[row]; 503 low = 0; 504 for ( l=0; l<n; l++ ) { /* loop over added columns */ 505 #if defined(PETSC_BOPT_g) 506 if (in[l] < 0) SETERRQ(1,0,"Negative column"); 507 if (in[l] >= a->nbs) SETERRQ(1,0,"Column too large"); 508 #endif 509 col = in[l]; 510 if (roworiented) { 511 value = v + k*(stepval+bs)*bs + l*bs; 512 } else { 513 value = v + l*(stepval+bs)*bs + k*bs; 514 } 515 if (!sorted) low = 0; high = nrow; 516 while (high-low > 7) { 517 t = (low+high)/2; 518 if (rp[t] > col) high = t; 519 else low = t; 520 } 521 for ( i=low; i<high; i++ ) { 522 if (rp[i] > col) break; 523 if (rp[i] == col) { 524 bap = ap + bs2*i; 525 if (roworiented) { 526 if (is == ADD_VALUES) { 527 for ( ii=0; ii<bs; ii++,value+=stepval ) 528 for (jj=ii; jj<bs2; jj+=bs ) 529 bap[jj] += *value++; 530 } else { 531 for ( ii=0; ii<bs; ii++,value+=stepval ) 532 for (jj=ii; jj<bs2; jj+=bs ) 533 bap[jj] = *value++; 534 } 535 } else { 536 if (is == ADD_VALUES) { 537 for ( ii=0; ii<bs; ii++,value+=stepval ) 538 for (jj=0; jj<bs; jj++ ) 539 *bap++ += *value++; 540 } else { 541 for ( ii=0; ii<bs; ii++,value+=stepval ) 542 for (jj=0; jj<bs; jj++ ) 543 *bap++ = *value++; 544 } 545 } 546 goto noinsert2; 547 } 548 } 549 if (nonew) goto noinsert2; 550 if (nrow >= rmax) { 551 /* there is no extra room in row, therefore enlarge */ 552 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 553 Scalar *new_a; 554 555 /* malloc new storage space */ 556 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 557 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 558 new_j = (int *) (new_a + bs2*new_nz); 559 new_i = new_j + new_nz; 560 561 /* copy over old data into new slots */ 562 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} 563 for ( ii=row+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 564 PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int)); 565 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 566 PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow, 567 len*sizeof(int)); 568 PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(Scalar)); 569 PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 570 PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE), 571 aa+bs2*(ai[row]+nrow),bs2*len*sizeof(Scalar)); 572 /* free up old matrix storage */ 573 PetscFree(a->a); 574 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 575 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 576 a->singlemalloc = 1; 577 578 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 579 rmax = imax[row] = imax[row] + CHUNKSIZE; 580 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 581 a->maxnz += bs2*CHUNKSIZE; 582 a->reallocs++; 583 a->nz++; 584 } 585 N = nrow++ - 1; 586 /* shift up all the later entries in this row */ 587 for ( ii=N; ii>=i; ii-- ) { 588 rp[ii+1] = rp[ii]; 589 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 590 } 591 if (N >= i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 592 rp[i] = col; 593 bap = ap + bs2*i; 594 if (roworiented) { 595 for ( ii=0; ii<bs; ii++,value+=stepval) 596 for (jj=ii; jj<bs2; jj+=bs ) 597 bap[jj] = *value++; 598 } else { 599 for ( ii=0; ii<bs; ii++,value+=stepval ) 600 for (jj=0; jj<bs; jj++ ) 601 *bap++ = *value++; 602 } 603 noinsert2:; 604 low = i; 605 } 606 ailen[row] = nrow; 607 } 608 return 0; 609 } 610 611 #undef __FUNC__ 612 #define __FUNC__ "MatGetSize_SeqBAIJ" /* ADIC Ignore */ 613 int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 614 { 615 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 616 *m = a->m; *n = a->n; 617 return 0; 618 } 619 620 #undef __FUNC__ 621 #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ" /* ADIC Ignore */ 622 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 623 { 624 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 625 *m = 0; *n = a->m; 626 return 0; 627 } 628 629 #undef __FUNC__ 630 #define __FUNC__ "MatGetRow_SeqBAIJ" 631 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 632 { 633 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 634 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 635 Scalar *aa,*v_i,*aa_i; 636 637 bs = a->bs; 638 ai = a->i; 639 aj = a->j; 640 aa = a->a; 641 bs2 = a->bs2; 642 643 if (row < 0 || row >= a->m) SETERRQ(1,0,"Row out of range"); 644 645 bn = row/bs; /* Block number */ 646 bp = row % bs; /* Block Position */ 647 M = ai[bn+1] - ai[bn]; 648 *nz = bs*M; 649 650 if (v) { 651 *v = 0; 652 if (*nz) { 653 *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v); 654 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 655 v_i = *v + i*bs; 656 aa_i = aa + bs2*(ai[bn] + i); 657 for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];} 658 } 659 } 660 } 661 662 if (idx) { 663 *idx = 0; 664 if (*nz) { 665 *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx); 666 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 667 idx_i = *idx + i*bs; 668 itmp = bs*aj[ai[bn] + i]; 669 for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;} 670 } 671 } 672 } 673 return 0; 674 } 675 676 #undef __FUNC__ 677 #define __FUNC__ "MatRestoreRow_SeqBAIJ" /* ADIC Ignore */ 678 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 679 { 680 if (idx) {if (*idx) PetscFree(*idx);} 681 if (v) {if (*v) PetscFree(*v);} 682 return 0; 683 } 684 685 #undef __FUNC__ 686 #define __FUNC__ "MatTranspose_SeqBAIJ" 687 int MatTranspose_SeqBAIJ(Mat A,Mat *B) 688 { 689 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 690 Mat C; 691 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 692 int *rows,*cols,bs2=a->bs2; 693 Scalar *array=a->a; 694 695 if (B==PETSC_NULL && mbs!=nbs) 696 SETERRQ(1,0,"Square matrix only for in-place"); 697 col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col); 698 PetscMemzero(col,(1+nbs)*sizeof(int)); 699 700 for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1; 701 ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr); 702 PetscFree(col); 703 rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows); 704 cols = rows + bs; 705 for ( i=0; i<mbs; i++ ) { 706 cols[0] = i*bs; 707 for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1; 708 len = ai[i+1] - ai[i]; 709 for ( j=0; j<len; j++ ) { 710 rows[0] = (*aj++)*bs; 711 for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1; 712 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr); 713 array += bs2; 714 } 715 } 716 PetscFree(rows); 717 718 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 719 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 720 721 if (B != PETSC_NULL) { 722 *B = C; 723 } else { 724 /* This isn't really an in-place transpose */ 725 PetscFree(a->a); 726 if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);} 727 if (a->diag) PetscFree(a->diag); 728 if (a->ilen) PetscFree(a->ilen); 729 if (a->imax) PetscFree(a->imax); 730 if (a->solve_work) PetscFree(a->solve_work); 731 PetscFree(a); 732 PetscMemcpy(A,C,sizeof(struct _Mat)); 733 PetscHeaderDestroy(C); 734 } 735 return 0; 736 } 737 738 739 #undef __FUNC__ 740 #define __FUNC__ "MatAssemblyEnd_SeqBAIJ" 741 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 742 { 743 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 744 int fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax; 745 int m = a->m,*ip, N, *ailen = a->ilen; 746 int mbs = a->mbs, bs2 = a->bs2,rmax; 747 Scalar *aa = a->a, *ap; 748 749 if (mode == MAT_FLUSH_ASSEMBLY) return 0; 750 751 rmax = ailen[0]; 752 for ( i=1; i<mbs; i++ ) { 753 /* move each row back by the amount of empty slots (fshift) before it*/ 754 fshift += imax[i-1] - ailen[i-1]; 755 rmax = PetscMax(rmax,ailen[i]); 756 if (fshift) { 757 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 758 N = ailen[i]; 759 for ( j=0; j<N; j++ ) { 760 ip[j-fshift] = ip[j]; 761 PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar)); 762 } 763 } 764 ai[i] = ai[i-1] + ailen[i-1]; 765 } 766 if (mbs) { 767 fshift += imax[mbs-1] - ailen[mbs-1]; 768 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 769 } 770 /* reset ilen and imax for each row */ 771 for ( i=0; i<mbs; i++ ) { 772 ailen[i] = imax[i] = ai[i+1] - ai[i]; 773 } 774 a->nz = ai[mbs]; 775 776 /* diagonals may have moved, so kill the diagonal pointers */ 777 if (fshift && a->diag) { 778 PetscFree(a->diag); 779 PLogObjectMemory(A,-(m+1)*sizeof(int)); 780 a->diag = 0; 781 } 782 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 783 m,a->n,a->bs,fshift*bs2,a->nz*bs2); 784 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n", 785 a->reallocs); 786 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 787 a->reallocs = 0; 788 A->info.nz_unneeded = (double)fshift*bs2; 789 790 return 0; 791 } 792 793 #undef __FUNC__ 794 #define __FUNC__ "MatZeroEntries_SeqBAIJ" 795 int MatZeroEntries_SeqBAIJ(Mat A) 796 { 797 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 798 PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar)); 799 return 0; 800 } 801 802 #undef __FUNC__ 803 #define __FUNC__ "MatDestroy_SeqBAIJ" /* ADIC Ignore */ 804 int MatDestroy_SeqBAIJ(PetscObject obj) 805 { 806 Mat A = (Mat) obj; 807 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 808 int ierr; 809 810 #if defined(PETSC_LOG) 811 PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 812 #endif 813 PetscFree(a->a); 814 if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);} 815 if (a->diag) PetscFree(a->diag); 816 if (a->ilen) PetscFree(a->ilen); 817 if (a->imax) PetscFree(a->imax); 818 if (a->solve_work) PetscFree(a->solve_work); 819 if (a->mult_work) PetscFree(a->mult_work); 820 PetscFree(a); 821 if (A->mapping) { 822 ierr = ISLocalToGlobalMappingDestroy(A->mapping); CHKERRQ(ierr); 823 } 824 PLogObjectDestroy(A); 825 PetscHeaderDestroy(A); 826 return 0; 827 } 828 829 #undef __FUNC__ 830 #define __FUNC__ "MatSetOption_SeqBAIJ" /* ADIC Ignore */ 831 int MatSetOption_SeqBAIJ(Mat A,MatOption op) 832 { 833 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 834 if (op == MAT_ROW_ORIENTED) a->roworiented = 1; 835 else if (op == MAT_COLUMN_ORIENTED) a->roworiented = 0; 836 else if (op == MAT_COLUMNS_SORTED) a->sorted = 1; 837 else if (op == MAT_COLUMNS_UNSORTED) a->sorted = 0; 838 else if (op == MAT_NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 839 else if (op == MAT_NEW_NONZERO_LOCATION_ERROR) a->nonew = -1; 840 else if (op == MAT_NEW_NONZERO_ALLOCATION_ERROR) a->nonew = -2; 841 else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 842 else if (op == MAT_ROWS_SORTED || 843 op == MAT_ROWS_UNSORTED || 844 op == MAT_SYMMETRIC || 845 op == MAT_STRUCTURALLY_SYMMETRIC || 846 op == MAT_YES_NEW_DIAGONALS || 847 op == MAT_IGNORE_OFF_PROC_ENTRIES) 848 PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n"); 849 else if (op == MAT_NO_NEW_DIAGONALS) 850 {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");} 851 else 852 {SETERRQ(PETSC_ERR_SUP,0,"unknown option");} 853 return 0; 854 } 855 856 857 /* -------------------------------------------------------*/ 858 /* Should check that shapes of vectors and matrices match */ 859 /* -------------------------------------------------------*/ 860 #include "pinclude/plapack.h" 861 862 #undef __FUNC__ 863 #define __FUNC__ "MatMult_SeqBAIJ_1" 864 static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz) 865 { 866 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 867 register Scalar *x,*z,*v,sum; 868 int mbs=a->mbs,i,*idx,*ii,n; 869 870 VecGetArray_Fast(xx,x); 871 VecGetArray_Fast(zz,z); 872 873 idx = a->j; 874 v = a->a; 875 ii = a->i; 876 877 for ( i=0; i<mbs; i++ ) { 878 n = ii[1] - ii[0]; ii++; 879 sum = 0.0; 880 while (n--) sum += *v++ * x[*idx++]; 881 z[i] = sum; 882 } 883 VecRestoreArray_Fast(xx,x); 884 VecRestoreArray_Fast(zz,z); 885 PLogFlops(2*a->nz - a->m); 886 return 0; 887 } 888 889 #undef __FUNC__ 890 #define __FUNC__ "MatMult_SeqBAIJ_2" 891 static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz) 892 { 893 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 894 register Scalar *x,*z,*v,*xb,sum1,sum2; 895 register Scalar x1,x2; 896 int mbs=a->mbs,i,*idx,*ii; 897 int j,n; 898 899 VecGetArray_Fast(xx,x); 900 VecGetArray_Fast(zz,z); 901 902 idx = a->j; 903 v = a->a; 904 ii = a->i; 905 906 for ( i=0; i<mbs; i++ ) { 907 n = ii[1] - ii[0]; ii++; 908 sum1 = 0.0; sum2 = 0.0; 909 for ( j=0; j<n; j++ ) { 910 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 911 sum1 += v[0]*x1 + v[2]*x2; 912 sum2 += v[1]*x1 + v[3]*x2; 913 v += 4; 914 } 915 z[0] = sum1; z[1] = sum2; 916 z += 2; 917 } 918 VecRestoreArray_Fast(xx,x); 919 VecRestoreArray_Fast(zz,z); 920 PLogFlops(4*a->nz - a->m); 921 return 0; 922 } 923 924 #undef __FUNC__ 925 #define __FUNC__ "MatMult_SeqBAIJ_3" 926 static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz) 927 { 928 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 929 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 930 int mbs=a->mbs,i,*idx,*ii,j,n; 931 932 VecGetArray_Fast(xx,x); 933 VecGetArray_Fast(zz,z); 934 935 idx = a->j; 936 v = a->a; 937 ii = a->i; 938 939 for ( i=0; i<mbs; i++ ) { 940 n = ii[1] - ii[0]; ii++; 941 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 942 for ( j=0; j<n; j++ ) { 943 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 944 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 945 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 946 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 947 v += 9; 948 } 949 z[0] = sum1; z[1] = sum2; z[2] = sum3; 950 z += 3; 951 } 952 VecRestoreArray_Fast(xx,x); 953 VecRestoreArray_Fast(zz,z); 954 PLogFlops(18*a->nz - a->m); 955 return 0; 956 } 957 958 #undef __FUNC__ 959 #define __FUNC__ "MatMult_SeqBAIJ_4" 960 static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz) 961 { 962 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 963 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4; 964 register Scalar x1,x2,x3,x4; 965 int mbs=a->mbs,i,*idx,*ii; 966 int j,n; 967 968 VecGetArray_Fast(xx,x); 969 VecGetArray_Fast(zz,z); 970 971 idx = a->j; 972 v = a->a; 973 ii = a->i; 974 975 for ( i=0; i<mbs; i++ ) { 976 n = ii[1] - ii[0]; ii++; 977 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 978 for ( j=0; j<n; j++ ) { 979 xb = x + 4*(*idx++); 980 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 981 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 982 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 983 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 984 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 985 v += 16; 986 } 987 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 988 z += 4; 989 } 990 VecRestoreArray_Fast(xx,x); 991 VecRestoreArray_Fast(zz,z); 992 PLogFlops(32*a->nz - a->m); 993 return 0; 994 } 995 996 #undef __FUNC__ 997 #define __FUNC__ "MatMult_SeqBAIJ_5" 998 static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz) 999 { 1000 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1001 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5; 1002 register Scalar x1,x2,x3,x4,x5; 1003 int mbs=a->mbs,i,*idx,*ii,j,n; 1004 1005 VecGetArray_Fast(xx,x); 1006 VecGetArray_Fast(zz,z); 1007 1008 idx = a->j; 1009 v = a->a; 1010 ii = a->i; 1011 1012 for ( i=0; i<mbs; i++ ) { 1013 n = ii[1] - ii[0]; ii++; 1014 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 1015 for ( j=0; j<n; j++ ) { 1016 xb = x + 5*(*idx++); 1017 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 1018 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 1019 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 1020 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 1021 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 1022 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 1023 v += 25; 1024 } 1025 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 1026 z += 5; 1027 } 1028 VecRestoreArray_Fast(xx,x); 1029 VecRestoreArray_Fast(zz,z); 1030 PLogFlops(50*a->nz - a->m); 1031 return 0; 1032 } 1033 1034 #undef __FUNC__ 1035 #define __FUNC__ "MatMult_SeqBAIJ_7" 1036 static int MatMult_SeqBAIJ_7(Mat A,Vec xx,Vec zz) 1037 { 1038 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1039 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7; 1040 register Scalar x1,x2,x3,x4,x5,x6,x7; 1041 int mbs=a->mbs,i,*idx,*ii,j,n; 1042 1043 VecGetArray_Fast(xx,x); 1044 VecGetArray_Fast(zz,z); 1045 1046 idx = a->j; 1047 v = a->a; 1048 ii = a->i; 1049 1050 for ( i=0; i<mbs; i++ ) { 1051 n = ii[1] - ii[0]; ii++; 1052 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0; 1053 for ( j=0; j<n; j++ ) { 1054 xb = x + 7*(*idx++); 1055 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6]; 1056 sum1 += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 1057 sum2 += v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 1058 sum3 += v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 1059 sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 1060 sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 1061 sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 1062 sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 1063 v += 49; 1064 } 1065 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7; 1066 z += 7; 1067 } 1068 1069 VecRestoreArray_Fast(xx,x); 1070 VecRestoreArray_Fast(zz,z); 1071 PLogFlops(98*a->nz - a->m); 1072 return 0; 1073 } 1074 1075 #undef __FUNC__ 1076 #define __FUNC__ "MatMult_SeqBAIJ_N" 1077 static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz) 1078 { 1079 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1080 register Scalar *x,*z,*v,*xb; 1081 int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2; 1082 int _One = 1,ncols,k; 1083 Scalar _DOne = 1.0,*work,*workt,_DZero = 0.0; 1084 1085 VecGetArray_Fast(xx,x); 1086 VecGetArray_Fast(zz,z); 1087 1088 idx = a->j; 1089 v = a->a; 1090 ii = a->i; 1091 1092 1093 if (!a->mult_work) { 1094 k = PetscMax(a->m,a->n); 1095 a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work); 1096 } 1097 work = a->mult_work; 1098 for ( i=0; i<mbs; i++ ) { 1099 n = ii[1] - ii[0]; ii++; 1100 ncols = n*bs; 1101 workt = work; 1102 for ( j=0; j<n; j++ ) { 1103 xb = x + bs*(*idx++); 1104 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 1105 workt += bs; 1106 } 1107 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One); 1108 v += n*bs2; 1109 z += bs; 1110 } 1111 VecRestoreArray_Fast(xx,x); 1112 VecRestoreArray_Fast(zz,z); 1113 PLogFlops(2*a->nz*bs2 - a->m); 1114 return 0; 1115 } 1116 1117 #undef __FUNC__ 1118 #define __FUNC__ "MatMultAdd_SeqBAIJ_1" 1119 static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 1120 { 1121 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1122 register Scalar *x,*y,*z,*v,sum; 1123 int mbs=a->mbs,i,*idx,*ii,n; 1124 1125 VecGetArray_Fast(xx,x); 1126 VecGetArray_Fast(yy,y); 1127 VecGetArray_Fast(zz,z); 1128 1129 idx = a->j; 1130 v = a->a; 1131 ii = a->i; 1132 1133 for ( i=0; i<mbs; i++ ) { 1134 n = ii[1] - ii[0]; ii++; 1135 sum = y[i]; 1136 while (n--) sum += *v++ * x[*idx++]; 1137 z[i] = sum; 1138 } 1139 VecRestoreArray_Fast(xx,x); 1140 VecRestoreArray_Fast(yy,y); 1141 VecRestoreArray_Fast(zz,z); 1142 PLogFlops(2*a->nz); 1143 return 0; 1144 } 1145 1146 #undef __FUNC__ 1147 #define __FUNC__ "MatMultAdd_SeqBAIJ_2" 1148 static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 1149 { 1150 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1151 register Scalar *x,*y,*z,*v,*xb,sum1,sum2; 1152 register Scalar x1,x2; 1153 int mbs=a->mbs,i,*idx,*ii; 1154 int j,n; 1155 1156 VecGetArray_Fast(xx,x); 1157 VecGetArray_Fast(yy,y); 1158 VecGetArray_Fast(zz,z); 1159 1160 idx = a->j; 1161 v = a->a; 1162 ii = a->i; 1163 1164 for ( i=0; i<mbs; i++ ) { 1165 n = ii[1] - ii[0]; ii++; 1166 sum1 = y[0]; sum2 = y[1]; 1167 for ( j=0; j<n; j++ ) { 1168 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 1169 sum1 += v[0]*x1 + v[2]*x2; 1170 sum2 += v[1]*x1 + v[3]*x2; 1171 v += 4; 1172 } 1173 z[0] = sum1; z[1] = sum2; 1174 z += 2; y += 2; 1175 } 1176 VecRestoreArray_Fast(xx,x); 1177 VecRestoreArray_Fast(yy,y); 1178 VecRestoreArray_Fast(zz,z); 1179 PLogFlops(4*a->nz); 1180 return 0; 1181 } 1182 1183 #undef __FUNC__ 1184 #define __FUNC__ "MatMultAdd_SeqBAIJ_3" 1185 static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 1186 { 1187 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1188 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 1189 int mbs=a->mbs,i,*idx,*ii,j,n; 1190 1191 VecGetArray_Fast(xx,x); 1192 VecGetArray_Fast(yy,y); 1193 VecGetArray_Fast(zz,z); 1194 1195 idx = a->j; 1196 v = a->a; 1197 ii = a->i; 1198 1199 for ( i=0; i<mbs; i++ ) { 1200 n = ii[1] - ii[0]; ii++; 1201 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; 1202 for ( j=0; j<n; j++ ) { 1203 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1204 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 1205 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 1206 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 1207 v += 9; 1208 } 1209 z[0] = sum1; z[1] = sum2; z[2] = sum3; 1210 z += 3; y += 3; 1211 } 1212 VecRestoreArray_Fast(xx,x); 1213 VecRestoreArray_Fast(yy,y); 1214 VecRestoreArray_Fast(zz,z); 1215 PLogFlops(18*a->nz); 1216 return 0; 1217 } 1218 1219 #undef __FUNC__ 1220 #define __FUNC__ "MatMultAdd_SeqBAIJ_4" 1221 static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 1222 { 1223 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1224 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4; 1225 register Scalar x1,x2,x3,x4; 1226 int mbs=a->mbs,i,*idx,*ii; 1227 int j,n; 1228 1229 VecGetArray_Fast(xx,x); 1230 VecGetArray_Fast(yy,y); 1231 VecGetArray_Fast(zz,z); 1232 1233 idx = a->j; 1234 v = a->a; 1235 ii = a->i; 1236 1237 for ( i=0; i<mbs; i++ ) { 1238 n = ii[1] - ii[0]; ii++; 1239 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; 1240 for ( j=0; j<n; j++ ) { 1241 xb = x + 4*(*idx++); 1242 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1243 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 1244 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 1245 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 1246 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 1247 v += 16; 1248 } 1249 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 1250 z += 4; y += 4; 1251 } 1252 VecRestoreArray_Fast(xx,x); 1253 VecRestoreArray_Fast(yy,y); 1254 VecRestoreArray_Fast(zz,z); 1255 PLogFlops(32*a->nz); 1256 return 0; 1257 } 1258 1259 #undef __FUNC__ 1260 #define __FUNC__ "MatMultAdd_SeqBAIJ_5" 1261 static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 1262 { 1263 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1264 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5; 1265 register Scalar x1,x2,x3,x4,x5; 1266 int mbs=a->mbs,i,*idx,*ii,j,n; 1267 1268 VecGetArray_Fast(xx,x); 1269 VecGetArray_Fast(yy,y); 1270 VecGetArray_Fast(zz,z); 1271 1272 idx = a->j; 1273 v = a->a; 1274 ii = a->i; 1275 1276 for ( i=0; i<mbs; i++ ) { 1277 n = ii[1] - ii[0]; ii++; 1278 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; 1279 for ( j=0; j<n; j++ ) { 1280 xb = x + 5*(*idx++); 1281 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 1282 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 1283 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 1284 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 1285 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 1286 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 1287 v += 25; 1288 } 1289 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 1290 z += 5; y += 5; 1291 } 1292 VecRestoreArray_Fast(xx,x); 1293 VecRestoreArray_Fast(yy,y); 1294 VecRestoreArray_Fast(zz,z); 1295 PLogFlops(50*a->nz); 1296 return 0; 1297 } 1298 1299 #undef __FUNC__ 1300 #define __FUNC__ "MatMultAdd_SeqBAIJ_7" 1301 static int MatMultAdd_SeqBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1302 { 1303 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1304 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7; 1305 register Scalar x1,x2,x3,x4,x5,x6,x7; 1306 int mbs=a->mbs,i,*idx,*ii,j,n; 1307 1308 VecGetArray_Fast(xx,x); 1309 VecGetArray_Fast(yy,y); 1310 VecGetArray_Fast(zz,z); 1311 1312 idx = a->j; 1313 v = a->a; 1314 ii = a->i; 1315 1316 for ( i=0; i<mbs; i++ ) { 1317 n = ii[1] - ii[0]; ii++; 1318 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; sum7 = y[6]; 1319 for ( j=0; j<n; j++ ) { 1320 xb = x + 7*(*idx++); 1321 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6]; 1322 sum1 += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 1323 sum2 += v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 1324 sum3 += v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 1325 sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 1326 sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 1327 sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 1328 sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 1329 v += 49; 1330 } 1331 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7; 1332 z += 7; y += 7; 1333 } 1334 VecRestoreArray_Fast(xx,x); 1335 VecRestoreArray_Fast(yy,y); 1336 VecRestoreArray_Fast(zz,z); 1337 PLogFlops(98*a->nz); 1338 return 0; 1339 } 1340 1341 1342 #undef __FUNC__ 1343 #define __FUNC__ "MatMultAdd_SeqBAIJ_N" 1344 static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 1345 { 1346 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1347 register Scalar *x,*z,*v,*xb; 1348 int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr; 1349 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1350 1351 if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 1352 1353 VecGetArray_Fast(xx,x); 1354 VecGetArray_Fast(zz,z); 1355 1356 idx = a->j; 1357 v = a->a; 1358 ii = a->i; 1359 1360 1361 if (!a->mult_work) { 1362 k = PetscMax(a->m,a->n); 1363 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work); 1364 } 1365 work = a->mult_work; 1366 for ( i=0; i<mbs; i++ ) { 1367 n = ii[1] - ii[0]; ii++; 1368 ncols = n*bs; 1369 workt = work; 1370 for ( j=0; j<n; j++ ) { 1371 xb = x + bs*(*idx++); 1372 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 1373 workt += bs; 1374 } 1375 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); 1376 v += n*bs2; 1377 z += bs; 1378 } 1379 VecRestoreArray_Fast(xx,x); 1380 VecRestoreArray_Fast(zz,z); 1381 PLogFlops(2*a->nz*bs2 ); 1382 return 0; 1383 } 1384 1385 #undef __FUNC__ 1386 #define __FUNC__ "MatMultTrans_SeqBAIJ" 1387 int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz) 1388 { 1389 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1390 Scalar *xg,*zg,*zb; 1391 register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 1392 int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 1393 int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1394 1395 1396 VecGetArray_Fast(xx,xg); x = xg; 1397 VecGetArray_Fast(zz,zg); z = zg; 1398 PetscMemzero(z,N*sizeof(Scalar)); 1399 1400 idx = a->j; 1401 v = a->a; 1402 ii = a->i; 1403 1404 switch (bs) { 1405 case 1: 1406 for ( i=0; i<mbs; i++ ) { 1407 n = ii[1] - ii[0]; ii++; 1408 xb = x + i; x1 = xb[0]; 1409 ib = idx + ai[i]; 1410 for ( j=0; j<n; j++ ) { 1411 rval = ib[j]; 1412 z[rval] += *v++ * x1; 1413 } 1414 } 1415 break; 1416 case 2: 1417 for ( i=0; i<mbs; i++ ) { 1418 n = ii[1] - ii[0]; ii++; 1419 xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1420 ib = idx + ai[i]; 1421 for ( j=0; j<n; j++ ) { 1422 rval = ib[j]*2; 1423 z[rval++] += v[0]*x1 + v[1]*x2; 1424 z[rval++] += v[2]*x1 + v[3]*x2; 1425 v += 4; 1426 } 1427 } 1428 break; 1429 case 3: 1430 for ( i=0; i<mbs; i++ ) { 1431 n = ii[1] - ii[0]; ii++; 1432 xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1433 ib = idx + ai[i]; 1434 for ( j=0; j<n; j++ ) { 1435 rval = ib[j]*3; 1436 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1437 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1438 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1439 v += 9; 1440 } 1441 } 1442 break; 1443 case 4: 1444 for ( i=0; i<mbs; i++ ) { 1445 n = ii[1] - ii[0]; ii++; 1446 xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1447 ib = idx + ai[i]; 1448 for ( j=0; j<n; j++ ) { 1449 rval = ib[j]*4; 1450 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1451 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1452 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1453 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1454 v += 16; 1455 } 1456 } 1457 break; 1458 case 5: 1459 for ( i=0; i<mbs; i++ ) { 1460 n = ii[1] - ii[0]; ii++; 1461 xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1462 x4 = xb[3]; x5 = xb[4]; 1463 ib = idx + ai[i]; 1464 for ( j=0; j<n; j++ ) { 1465 rval = ib[j]*5; 1466 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1467 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1468 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1469 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1470 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1471 v += 25; 1472 } 1473 } 1474 break; 1475 /* block sizes larger then 5 by 5 are handled by BLAS */ 1476 default: { 1477 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1478 if (!a->mult_work) { 1479 k = PetscMax(a->m,a->n); 1480 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1481 CHKPTRQ(a->mult_work); 1482 } 1483 work = a->mult_work; 1484 for ( i=0; i<mbs; i++ ) { 1485 n = ii[1] - ii[0]; ii++; 1486 ncols = n*bs; 1487 PetscMemzero(work,ncols*sizeof(Scalar)); 1488 LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1489 v += n*bs2; 1490 x += bs; 1491 workt = work; 1492 for ( j=0; j<n; j++ ) { 1493 zb = z + bs*(*idx++); 1494 for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1495 workt += bs; 1496 } 1497 } 1498 } 1499 } 1500 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 1501 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1502 PLogFlops(2*a->nz*a->bs2 - a->n); 1503 return 0; 1504 } 1505 1506 #undef __FUNC__ 1507 #define __FUNC__ "MatMultTransAdd_SeqBAIJ" 1508 int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1509 { 1510 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1511 Scalar *xg,*zg,*zb; 1512 register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 1513 int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 1514 int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1515 1516 1517 1518 VecGetArray_Fast(xx,xg); x = xg; 1519 VecGetArray_Fast(zz,zg); z = zg; 1520 1521 if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 1522 else PetscMemzero(z,N*sizeof(Scalar)); 1523 1524 1525 idx = a->j; 1526 v = a->a; 1527 ii = a->i; 1528 1529 switch (bs) { 1530 case 1: 1531 for ( i=0; i<mbs; i++ ) { 1532 n = ii[1] - ii[0]; ii++; 1533 xb = x + i; x1 = xb[0]; 1534 ib = idx + ai[i]; 1535 for ( j=0; j<n; j++ ) { 1536 rval = ib[j]; 1537 z[rval] += *v++ * x1; 1538 } 1539 } 1540 break; 1541 case 2: 1542 for ( i=0; i<mbs; i++ ) { 1543 n = ii[1] - ii[0]; ii++; 1544 xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1545 ib = idx + ai[i]; 1546 for ( j=0; j<n; j++ ) { 1547 rval = ib[j]*2; 1548 z[rval++] += v[0]*x1 + v[1]*x2; 1549 z[rval++] += v[2]*x1 + v[3]*x2; 1550 v += 4; 1551 } 1552 } 1553 break; 1554 case 3: 1555 for ( i=0; i<mbs; i++ ) { 1556 n = ii[1] - ii[0]; ii++; 1557 xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1558 ib = idx + ai[i]; 1559 for ( j=0; j<n; j++ ) { 1560 rval = ib[j]*3; 1561 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1562 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1563 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1564 v += 9; 1565 } 1566 } 1567 break; 1568 case 4: 1569 for ( i=0; i<mbs; i++ ) { 1570 n = ii[1] - ii[0]; ii++; 1571 xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1572 ib = idx + ai[i]; 1573 for ( j=0; j<n; j++ ) { 1574 rval = ib[j]*4; 1575 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1576 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1577 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1578 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1579 v += 16; 1580 } 1581 } 1582 break; 1583 case 5: 1584 for ( i=0; i<mbs; i++ ) { 1585 n = ii[1] - ii[0]; ii++; 1586 xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1587 x4 = xb[3]; x5 = xb[4]; 1588 ib = idx + ai[i]; 1589 for ( j=0; j<n; j++ ) { 1590 rval = ib[j]*5; 1591 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1592 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1593 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1594 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1595 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1596 v += 25; 1597 } 1598 } 1599 break; 1600 /* block sizes larger then 5 by 5 are handled by BLAS */ 1601 default: { 1602 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1603 if (!a->mult_work) { 1604 k = PetscMax(a->m,a->n); 1605 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1606 CHKPTRQ(a->mult_work); 1607 } 1608 work = a->mult_work; 1609 for ( i=0; i<mbs; i++ ) { 1610 n = ii[1] - ii[0]; ii++; 1611 ncols = n*bs; 1612 PetscMemzero(work,ncols*sizeof(Scalar)); 1613 LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1614 v += n*bs2; 1615 x += bs; 1616 workt = work; 1617 for ( j=0; j<n; j++ ) { 1618 zb = z + bs*(*idx++); 1619 for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1620 workt += bs; 1621 } 1622 } 1623 } 1624 } 1625 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 1626 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1627 PLogFlops(2*a->nz*a->bs2); 1628 return 0; 1629 } 1630 1631 #undef __FUNC__ 1632 #define __FUNC__ "MatGetInfo_SeqBAIJ" /* ADIC Ignore */ 1633 int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info) 1634 { 1635 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1636 1637 info->rows_global = (double)a->m; 1638 info->columns_global = (double)a->n; 1639 info->rows_local = (double)a->m; 1640 info->columns_local = (double)a->n; 1641 info->block_size = a->bs2; 1642 info->nz_allocated = a->maxnz; 1643 info->nz_used = a->bs2*a->nz; 1644 info->nz_unneeded = (double)(info->nz_allocated - info->nz_used); 1645 /* if (info->nz_unneeded != A->info.nz_unneeded) 1646 printf("space descrepancy: maxnz-nz = %d, nz_unneeded = %d\n",(int)info->nz_unneeded,(int)A->info.nz_unneeded); */ 1647 info->assemblies = A->num_ass; 1648 info->mallocs = a->reallocs; 1649 info->memory = A->mem; 1650 if (A->factor) { 1651 info->fill_ratio_given = A->info.fill_ratio_given; 1652 info->fill_ratio_needed = A->info.fill_ratio_needed; 1653 info->factor_mallocs = A->info.factor_mallocs; 1654 } else { 1655 info->fill_ratio_given = 0; 1656 info->fill_ratio_needed = 0; 1657 info->factor_mallocs = 0; 1658 } 1659 return 0; 1660 } 1661 1662 #undef __FUNC__ 1663 #define __FUNC__ "MatEqual_SeqBAIJ" 1664 int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg) 1665 { 1666 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 1667 1668 if (B->type !=MATSEQBAIJ)SETERRQ(1,0,"Matrices must be same type"); 1669 1670 /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 1671 if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)|| 1672 (a->nz != b->nz)) { 1673 *flg = PETSC_FALSE; return 0; 1674 } 1675 1676 /* if the a->i are the same */ 1677 if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) { 1678 *flg = PETSC_FALSE; return 0; 1679 } 1680 1681 /* if a->j are the same */ 1682 if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) { 1683 *flg = PETSC_FALSE; return 0; 1684 } 1685 1686 /* if a->a are the same */ 1687 if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) { 1688 *flg = PETSC_FALSE; return 0; 1689 } 1690 *flg = PETSC_TRUE; 1691 return 0; 1692 1693 } 1694 1695 #undef __FUNC__ 1696 #define __FUNC__ "MatGetDiagonal_SeqBAIJ" 1697 int MatGetDiagonal_SeqBAIJ(Mat A,Vec v) 1698 { 1699 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1700 int i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 1701 Scalar *x, zero = 0.0,*aa,*aa_j; 1702 1703 if (A->factor) SETERRQ(1,0,"Not for factored matrix"); 1704 bs = a->bs; 1705 aa = a->a; 1706 ai = a->i; 1707 aj = a->j; 1708 ambs = a->mbs; 1709 bs2 = a->bs2; 1710 1711 VecSet(&zero,v); 1712 VecGetArray_Fast(v,x); VecGetLocalSize_Fast(v,n); 1713 if (n != a->m) SETERRQ(1,0,"Nonconforming matrix and vector"); 1714 for ( i=0; i<ambs; i++ ) { 1715 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1716 if (aj[j] == i) { 1717 row = i*bs; 1718 aa_j = aa+j*bs2; 1719 for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 1720 break; 1721 } 1722 } 1723 } 1724 return 0; 1725 } 1726 1727 #undef __FUNC__ 1728 #define __FUNC__ "MatDiagonalScale_SeqBAIJ" 1729 int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr) 1730 { 1731 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1732 Scalar *l,*r,x,*v,*aa,*li,*ri; 1733 int i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2; 1734 1735 ai = a->i; 1736 aj = a->j; 1737 aa = a->a; 1738 m = a->m; 1739 n = a->n; 1740 bs = a->bs; 1741 mbs = a->mbs; 1742 bs2 = a->bs2; 1743 if (ll) { 1744 VecGetArray_Fast(ll,l); VecGetLocalSize_Fast(ll,lm); 1745 if (lm != m) SETERRQ(1,0,"Left scaling vector wrong length"); 1746 for ( i=0; i<mbs; i++ ) { /* for each block row */ 1747 M = ai[i+1] - ai[i]; 1748 li = l + i*bs; 1749 v = aa + bs2*ai[i]; 1750 for ( j=0; j<M; j++ ) { /* for each block */ 1751 for ( k=0; k<bs2; k++ ) { 1752 (*v++) *= li[k%bs]; 1753 } 1754 } 1755 } 1756 } 1757 1758 if (rr) { 1759 VecGetArray_Fast(rr,r); VecGetLocalSize_Fast(rr,rn); 1760 if (rn != n) SETERRQ(1,0,"Right scaling vector wrong length"); 1761 for ( i=0; i<mbs; i++ ) { /* for each block row */ 1762 M = ai[i+1] - ai[i]; 1763 v = aa + bs2*ai[i]; 1764 for ( j=0; j<M; j++ ) { /* for each block */ 1765 ri = r + bs*aj[ai[i]+j]; 1766 for ( k=0; k<bs; k++ ) { 1767 x = ri[k]; 1768 for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x; 1769 } 1770 } 1771 } 1772 } 1773 return 0; 1774 } 1775 1776 1777 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 1778 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 1779 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int); 1780 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*); 1781 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**); 1782 1783 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec); 1784 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec); 1785 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec); 1786 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec); 1787 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec); 1788 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec); 1789 extern int MatSolve_SeqBAIJ_7(Mat,Vec,Vec); 1790 1791 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 1792 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 1793 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 1794 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 1795 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 1796 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 1797 1798 #undef __FUNC__ 1799 #define __FUNC__ "MatNorm_SeqBAIJ" 1800 int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm) 1801 { 1802 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1803 Scalar *v = a->a; 1804 double sum = 0.0; 1805 int i,nz=a->nz,bs2=a->bs2; 1806 1807 if (type == NORM_FROBENIUS) { 1808 for (i=0; i< bs2*nz; i++ ) { 1809 #if defined(PETSC_COMPLEX) 1810 sum += real(conj(*v)*(*v)); v++; 1811 #else 1812 sum += (*v)*(*v); v++; 1813 #endif 1814 } 1815 *norm = sqrt(sum); 1816 } 1817 else { 1818 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 1819 } 1820 return 0; 1821 } 1822 1823 /* 1824 note: This can only work for identity for row and col. It would 1825 be good to check this and otherwise generate an error. 1826 */ 1827 #undef __FUNC__ 1828 #define __FUNC__ "MatILUFactor_SeqBAIJ" 1829 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill) 1830 { 1831 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1832 Mat outA; 1833 int ierr; 1834 1835 if (fill != 0) SETERRQ(1,0,"Only fill=0 supported"); 1836 1837 outA = inA; 1838 inA->factor = FACTOR_LU; 1839 a->row = row; 1840 a->col = col; 1841 1842 a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 1843 1844 if (!a->diag) { 1845 ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr); 1846 } 1847 ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr); 1848 return 0; 1849 } 1850 1851 #undef __FUNC__ 1852 #define __FUNC__ "MatScale_SeqBAIJ" 1853 int MatScale_SeqBAIJ(Scalar *alpha,Mat inA) 1854 { 1855 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1856 int one = 1, totalnz = a->bs2*a->nz; 1857 BLscal_( &totalnz, alpha, a->a, &one ); 1858 PLogFlops(totalnz); 1859 return 0; 1860 } 1861 1862 #undef __FUNC__ 1863 #define __FUNC__ "MatGetValues_SeqBAIJ" 1864 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 1865 { 1866 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1867 int *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 1868 int *ai = a->i, *ailen = a->ilen; 1869 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 1870 Scalar *ap, *aa = a->a, zero = 0.0; 1871 1872 for ( k=0; k<m; k++ ) { /* loop over rows */ 1873 row = im[k]; brow = row/bs; 1874 if (row < 0) SETERRQ(1,0,"Negative row"); 1875 if (row >= a->m) SETERRQ(1,0,"Row too large"); 1876 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1877 nrow = ailen[brow]; 1878 for ( l=0; l<n; l++ ) { /* loop over columns */ 1879 if (in[l] < 0) SETERRQ(1,0,"Negative column"); 1880 if (in[l] >= a->n) SETERRQ(1,0,"Column too large"); 1881 col = in[l] ; 1882 bcol = col/bs; 1883 cidx = col%bs; 1884 ridx = row%bs; 1885 high = nrow; 1886 low = 0; /* assume unsorted */ 1887 while (high-low > 5) { 1888 t = (low+high)/2; 1889 if (rp[t] > bcol) high = t; 1890 else low = t; 1891 } 1892 for ( i=low; i<high; i++ ) { 1893 if (rp[i] > bcol) break; 1894 if (rp[i] == bcol) { 1895 *v++ = ap[bs2*i+bs*cidx+ridx]; 1896 goto finished; 1897 } 1898 } 1899 *v++ = zero; 1900 finished:; 1901 } 1902 } 1903 return 0; 1904 } 1905 1906 #undef __FUNC__ 1907 #define __FUNC__ "MatGetBlockSize_SeqBAIJ" /* ADIC Ignore */ 1908 int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs) 1909 { 1910 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data; 1911 *bs = baij->bs; 1912 return 0; 1913 } 1914 1915 /* idx should be of length atlease bs */ 1916 #undef __FUNC__ 1917 #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Block" 1918 static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg) 1919 { 1920 int i,row; 1921 row = idx[0]; 1922 if (row%bs!=0) { *flg = PETSC_FALSE; return 0; } 1923 1924 for ( i=1; i<bs; i++ ) { 1925 if (row+i != idx[i]) { *flg = PETSC_FALSE; return 0; } 1926 } 1927 *flg = PETSC_TRUE; 1928 return 0; 1929 } 1930 1931 #undef __FUNC__ 1932 #define __FUNC__ "MatZeroRows_SeqBAIJ" 1933 int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag) 1934 { 1935 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1936 IS is_local; 1937 int ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2; 1938 PetscTruth flg; 1939 Scalar zero = 0.0,*aa; 1940 1941 /* Make a copy of the IS and sort it */ 1942 ierr = ISGetSize(is,&is_n);CHKERRQ(ierr); 1943 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1944 ierr = ISCreateGeneral(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr); 1945 ierr = ISSort(is_local); CHKERRQ(ierr); 1946 ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr); 1947 1948 i = 0; 1949 while (i < is_n) { 1950 if (rows[i]<0 || rows[i]>m) SETERRQ(1,0,"row out of range"); 1951 flg = PETSC_FALSE; 1952 if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); } 1953 if (flg) { /* There exists a block of rows to be Zerowed */ 1954 baij->ilen[rows[i]/bs] = 0; 1955 i += bs; 1956 } else { /* Zero out only the requested row */ 1957 count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs; 1958 aa = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs); 1959 for ( j=0; j<count; j++ ) { 1960 aa[0] = zero; 1961 aa+=bs; 1962 } 1963 i++; 1964 } 1965 } 1966 if (diag) { 1967 for ( j=0; j<is_n; j++ ) { 1968 ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1969 } 1970 } 1971 ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr); 1972 ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr); 1973 ierr = ISDestroy(is_local); CHKERRQ(ierr); 1974 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1975 1976 return 0; 1977 } 1978 1979 #undef __FUNC__ 1980 #define __FUNC__ "MatPrintHelp_SeqBAIJ" /* ADIC Ignore */ 1981 int MatPrintHelp_SeqBAIJ(Mat A) 1982 { 1983 static int called = 0; 1984 MPI_Comm comm = A->comm; 1985 1986 if (called) return 0; else called = 1; 1987 PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n"); 1988 PetscPrintf(comm," -mat_block_size <block_size>\n"); 1989 return 0; 1990 } 1991 1992 /* -------------------------------------------------------------------*/ 1993 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ, 1994 MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ, 1995 MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N, 1996 MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ, 1997 MatSolve_SeqBAIJ_N,0, 1998 0,0, 1999 MatLUFactor_SeqBAIJ,0, 2000 0, 2001 MatTranspose_SeqBAIJ, 2002 MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ, 2003 MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ, 2004 0,MatAssemblyEnd_SeqBAIJ, 2005 0, 2006 MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ, 2007 MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0, 2008 MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ, 2009 MatILUFactorSymbolic_SeqBAIJ,0, 2010 0,0, 2011 MatConvertSameType_SeqBAIJ,0,0, 2012 MatILUFactor_SeqBAIJ,0,0, 2013 MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ, 2014 MatGetValues_SeqBAIJ,0, 2015 MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ, 2016 0,0,0,MatGetBlockSize_SeqBAIJ, 2017 MatGetRowIJ_SeqBAIJ, 2018 MatRestoreRowIJ_SeqBAIJ, 2019 0,0,0,0,0,0, 2020 MatSetValuesBlocked_SeqBAIJ}; 2021 2022 #undef __FUNC__ 2023 #define __FUNC__ "MatCreateSeqBAIJ" 2024 /*@C 2025 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 2026 compressed row) format. For good matrix assembly performance the 2027 user should preallocate the matrix storage by setting the parameter nz 2028 (or the array nzz). By setting these parameters accurately, performance 2029 during matrix assembly can be increased by more than a factor of 50. 2030 2031 Input Parameters: 2032 . comm - MPI communicator, set to PETSC_COMM_SELF 2033 . bs - size of block 2034 . m - number of rows 2035 . n - number of columns 2036 . nz - number of block nonzeros per block row (same for all rows) 2037 . nzz - array containing the number of block nonzeros in the various block rows 2038 (possibly different for each block row) or PETSC_NULL 2039 2040 Output Parameter: 2041 . A - the matrix 2042 2043 Options Database Keys: 2044 $ -mat_no_unroll - uses code that does not unroll the loops in the 2045 $ block calculations (much solver) 2046 $ -mat_block_size - size of the blocks to use 2047 2048 Notes: 2049 The block AIJ format is fully compatible with standard Fortran 77 2050 storage. That is, the stored row and column indices can begin at 2051 either one (as in Fortran) or zero. See the users' manual for details. 2052 2053 Specify the preallocated storage with either nz or nnz (not both). 2054 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2055 allocation. For additional details, see the users manual chapter on 2056 matrices. 2057 2058 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2059 @*/ 2060 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 2061 { 2062 Mat B; 2063 Mat_SeqBAIJ *b; 2064 int i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs,size; 2065 2066 MPI_Comm_size(comm,&size); 2067 if (size > 1) SETERRQ(1,0,"Comm must be of size 1"); 2068 2069 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr); 2070 2071 if (mbs*bs!=m || nbs*bs!=n) 2072 SETERRQ(1,0,"Number rows, cols must be divisible by blocksize"); 2073 2074 *A = 0; 2075 PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm); 2076 PLogObjectCreate(B); 2077 B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b); 2078 PetscMemzero(b,sizeof(Mat_SeqBAIJ)); 2079 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 2080 ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr); 2081 if (!flg) { 2082 switch (bs) { 2083 case 1: 2084 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2085 B->ops.solve = MatSolve_SeqBAIJ_1; 2086 B->ops.mult = MatMult_SeqBAIJ_1; 2087 B->ops.multadd = MatMultAdd_SeqBAIJ_1; 2088 break; 2089 case 2: 2090 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2091 B->ops.solve = MatSolve_SeqBAIJ_2; 2092 B->ops.mult = MatMult_SeqBAIJ_2; 2093 B->ops.multadd = MatMultAdd_SeqBAIJ_2; 2094 break; 2095 case 3: 2096 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2097 B->ops.solve = MatSolve_SeqBAIJ_3; 2098 B->ops.mult = MatMult_SeqBAIJ_3; 2099 B->ops.multadd = MatMultAdd_SeqBAIJ_3; 2100 break; 2101 case 4: 2102 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2103 B->ops.solve = MatSolve_SeqBAIJ_4; 2104 B->ops.mult = MatMult_SeqBAIJ_4; 2105 B->ops.multadd = MatMultAdd_SeqBAIJ_4; 2106 break; 2107 case 5: 2108 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2109 B->ops.solve = MatSolve_SeqBAIJ_5; 2110 B->ops.mult = MatMult_SeqBAIJ_5; 2111 B->ops.multadd = MatMultAdd_SeqBAIJ_5; 2112 break; 2113 case 7: 2114 B->ops.mult = MatMult_SeqBAIJ_7; 2115 B->ops.solve = MatSolve_SeqBAIJ_7; 2116 B->ops.multadd = MatMultAdd_SeqBAIJ_7; 2117 break; 2118 } 2119 } 2120 B->destroy = MatDestroy_SeqBAIJ; 2121 B->view = MatView_SeqBAIJ; 2122 B->factor = 0; 2123 B->lupivotthreshold = 1.0; 2124 B->mapping = 0; 2125 b->row = 0; 2126 b->col = 0; 2127 b->reallocs = 0; 2128 2129 b->m = m; B->m = m; B->M = m; 2130 b->n = n; B->n = n; B->N = n; 2131 b->mbs = mbs; 2132 b->nbs = nbs; 2133 b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax); 2134 if (nnz == PETSC_NULL) { 2135 if (nz == PETSC_DEFAULT) nz = 5; 2136 else if (nz <= 0) nz = 1; 2137 for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 2138 nz = nz*mbs; 2139 } 2140 else { 2141 nz = 0; 2142 for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 2143 } 2144 2145 /* allocate the matrix space */ 2146 len = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int); 2147 b->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a); 2148 PetscMemzero(b->a,nz*bs2*sizeof(Scalar)); 2149 b->j = (int *) (b->a + nz*bs2); 2150 PetscMemzero(b->j,nz*sizeof(int)); 2151 b->i = b->j + nz; 2152 b->singlemalloc = 1; 2153 2154 b->i[0] = 0; 2155 for (i=1; i<mbs+1; i++) { 2156 b->i[i] = b->i[i-1] + b->imax[i-1]; 2157 } 2158 2159 /* b->ilen will count nonzeros in each block row so far. */ 2160 b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 2161 PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 2162 for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 2163 2164 b->bs = bs; 2165 b->bs2 = bs2; 2166 b->mbs = mbs; 2167 b->nz = 0; 2168 b->maxnz = nz*bs2; 2169 b->sorted = 0; 2170 b->roworiented = 1; 2171 b->nonew = 0; 2172 b->diag = 0; 2173 b->solve_work = 0; 2174 b->mult_work = 0; 2175 b->spptr = 0; 2176 B->info.nz_unneeded = (double)b->maxnz; 2177 2178 *A = B; 2179 ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr); 2180 if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); } 2181 return 0; 2182 } 2183 2184 #undef __FUNC__ 2185 #define __FUNC__ "MatConvertSameType_SeqBAIJ" 2186 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues) 2187 { 2188 Mat C; 2189 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 2190 int i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 2191 2192 if (a->i[mbs] != nz) SETERRQ(1,0,"Corrupt matrix"); 2193 2194 *B = 0; 2195 PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm); 2196 PLogObjectCreate(C); 2197 C->data = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c); 2198 PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps)); 2199 C->destroy = MatDestroy_SeqBAIJ; 2200 C->view = MatView_SeqBAIJ; 2201 C->factor = A->factor; 2202 c->row = 0; 2203 c->col = 0; 2204 C->assembled = PETSC_TRUE; 2205 2206 c->m = C->m = a->m; 2207 c->n = C->n = a->n; 2208 C->M = a->m; 2209 C->N = a->n; 2210 2211 c->bs = a->bs; 2212 c->bs2 = a->bs2; 2213 c->mbs = a->mbs; 2214 c->nbs = a->nbs; 2215 2216 c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax); 2217 c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen); 2218 for ( i=0; i<mbs; i++ ) { 2219 c->imax[i] = a->imax[i]; 2220 c->ilen[i] = a->ilen[i]; 2221 } 2222 2223 /* allocate the matrix space */ 2224 c->singlemalloc = 1; 2225 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int)); 2226 c->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a); 2227 c->j = (int *) (c->a + nz*bs2); 2228 c->i = c->j + nz; 2229 PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int)); 2230 if (mbs > 0) { 2231 PetscMemcpy(c->j,a->j,nz*sizeof(int)); 2232 if (cpvalues == COPY_VALUES) { 2233 PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar)); 2234 } 2235 } 2236 2237 PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 2238 c->sorted = a->sorted; 2239 c->roworiented = a->roworiented; 2240 c->nonew = a->nonew; 2241 2242 if (a->diag) { 2243 c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag); 2244 PLogObjectMemory(C,(mbs+1)*sizeof(int)); 2245 for ( i=0; i<mbs; i++ ) { 2246 c->diag[i] = a->diag[i]; 2247 } 2248 } 2249 else c->diag = 0; 2250 c->nz = a->nz; 2251 c->maxnz = a->maxnz; 2252 c->solve_work = 0; 2253 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 2254 c->mult_work = 0; 2255 *B = C; 2256 return 0; 2257 } 2258 2259 #undef __FUNC__ 2260 #define __FUNC__ "MatLoad_SeqBAIJ" 2261 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 2262 { 2263 Mat_SeqBAIJ *a; 2264 Mat B; 2265 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg; 2266 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2267 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 2268 int *masked, nmask,tmp,bs2,ishift; 2269 Scalar *aa; 2270 MPI_Comm comm = ((PetscObject) viewer)->comm; 2271 2272 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 2273 bs2 = bs*bs; 2274 2275 MPI_Comm_size(comm,&size); 2276 if (size > 1) SETERRQ(1,0,"view must have one processor"); 2277 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 2278 ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr); 2279 if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not Mat object"); 2280 M = header[1]; N = header[2]; nz = header[3]; 2281 2282 if (M != N) SETERRQ(1,0,"Can only do square matrices"); 2283 2284 /* 2285 This code adds extra rows to make sure the number of rows is 2286 divisible by the blocksize 2287 */ 2288 mbs = M/bs; 2289 extra_rows = bs - M + bs*(mbs); 2290 if (extra_rows == bs) extra_rows = 0; 2291 else mbs++; 2292 if (extra_rows) { 2293 PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 2294 } 2295 2296 /* read in row lengths */ 2297 rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 2298 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 2299 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 2300 2301 /* read in column indices */ 2302 jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj); 2303 ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr); 2304 for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 2305 2306 /* loop over row lengths determining block row lengths */ 2307 browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 2308 PetscMemzero(browlengths,mbs*sizeof(int)); 2309 mask = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask); 2310 PetscMemzero(mask,mbs*sizeof(int)); 2311 masked = mask + mbs; 2312 rowcount = 0; nzcount = 0; 2313 for ( i=0; i<mbs; i++ ) { 2314 nmask = 0; 2315 for ( j=0; j<bs; j++ ) { 2316 kmax = rowlengths[rowcount]; 2317 for ( k=0; k<kmax; k++ ) { 2318 tmp = jj[nzcount++]/bs; 2319 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2320 } 2321 rowcount++; 2322 } 2323 browlengths[i] += nmask; 2324 /* zero out the mask elements we set */ 2325 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 2326 } 2327 2328 /* create our matrix */ 2329 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A); 2330 CHKERRQ(ierr); 2331 B = *A; 2332 a = (Mat_SeqBAIJ *) B->data; 2333 2334 /* set matrix "i" values */ 2335 a->i[0] = 0; 2336 for ( i=1; i<= mbs; i++ ) { 2337 a->i[i] = a->i[i-1] + browlengths[i-1]; 2338 a->ilen[i-1] = browlengths[i-1]; 2339 } 2340 a->nz = 0; 2341 for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 2342 2343 /* read in nonzero values */ 2344 aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 2345 ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr); 2346 for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 2347 2348 /* set "a" and "j" values into matrix */ 2349 nzcount = 0; jcount = 0; 2350 for ( i=0; i<mbs; i++ ) { 2351 nzcountb = nzcount; 2352 nmask = 0; 2353 for ( j=0; j<bs; j++ ) { 2354 kmax = rowlengths[i*bs+j]; 2355 for ( k=0; k<kmax; k++ ) { 2356 tmp = jj[nzcount++]/bs; 2357 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2358 } 2359 rowcount++; 2360 } 2361 /* sort the masked values */ 2362 PetscSortInt(nmask,masked); 2363 2364 /* set "j" values into matrix */ 2365 maskcount = 1; 2366 for ( j=0; j<nmask; j++ ) { 2367 a->j[jcount++] = masked[j]; 2368 mask[masked[j]] = maskcount++; 2369 } 2370 /* set "a" values into matrix */ 2371 ishift = bs2*a->i[i]; 2372 for ( j=0; j<bs; j++ ) { 2373 kmax = rowlengths[i*bs+j]; 2374 for ( k=0; k<kmax; k++ ) { 2375 tmp = jj[nzcountb]/bs ; 2376 block = mask[tmp] - 1; 2377 point = jj[nzcountb] - bs*tmp; 2378 idx = ishift + bs2*block + j + bs*point; 2379 a->a[idx] = aa[nzcountb++]; 2380 } 2381 } 2382 /* zero out the mask elements we set */ 2383 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 2384 } 2385 if (jcount != a->nz) SETERRQ(1,0,"Error bad binary matrix"); 2386 2387 PetscFree(rowlengths); 2388 PetscFree(browlengths); 2389 PetscFree(aa); 2390 PetscFree(jj); 2391 PetscFree(mask); 2392 2393 B->assembled = PETSC_TRUE; 2394 2395 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr); 2396 if (flg) { 2397 Viewer tviewer; 2398 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2399 ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO,0);CHKERRQ(ierr); 2400 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2401 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2402 } 2403 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr); 2404 if (flg) { 2405 Viewer tviewer; 2406 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2407 ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO_LONG,0);CHKERRQ(ierr); 2408 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2409 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2410 } 2411 ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr); 2412 if (flg) { 2413 Viewer tviewer; 2414 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2415 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2416 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2417 } 2418 ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr); 2419 if (flg) { 2420 Viewer tviewer; 2421 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2422 ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_MATLAB,"M");CHKERRQ(ierr); 2423 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2424 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2425 } 2426 ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr); 2427 if (flg) { 2428 Viewer tviewer; 2429 ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr); 2430 ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr); 2431 ierr = ViewerFlush(tviewer); CHKERRQ(ierr); 2432 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2433 } 2434 return 0; 2435 } 2436 2437 2438 2439