1 #ifndef lint 2 static char vcid[] = "$Id: baij.c,v 1.102 1997/05/23 18:38:09 balay Exp balay $"; 3 #endif 4 5 /* 6 Defines the basic matrix operations for the BAIJ (compressed row) 7 matrix storage format. 8 */ 9 10 #include "pinclude/pviewer.h" 11 #include "sys.h" 12 #include "src/mat/impls/baij/seq/baij.h" 13 #include "src/vec/vecimpl.h" 14 #include "src/inline/spops.h" 15 #include "petsc.h" 16 17 18 /* 19 Adds diagonal pointers to sparse matrix structure. 20 */ 21 22 #undef __FUNC__ 23 #define __FUNC__ "MatMarkDiag_SeqBAIJ" 24 int MatMarkDiag_SeqBAIJ(Mat A) 25 { 26 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 27 int i,j, *diag, m = a->mbs; 28 29 diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag); 30 PLogObjectMemory(A,(m+1)*sizeof(int)); 31 for ( i=0; i<m; i++ ) { 32 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 33 if (a->j[j] == i) { 34 diag[i] = j; 35 break; 36 } 37 } 38 } 39 a->diag = diag; 40 return 0; 41 } 42 43 44 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 45 46 #undef __FUNC__ 47 #define __FUNC__ "MatGetRowIJ_SeqBAIJ" 48 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 49 PetscTruth *done) 50 { 51 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 52 int ierr, n = a->mbs,i; 53 54 *nn = n; 55 if (!ia) return 0; 56 if (symmetric) { 57 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 58 } else if (oshift == 1) { 59 /* temporarily add 1 to i and j indices */ 60 int nz = a->i[n] + 1; 61 for ( i=0; i<nz; i++ ) a->j[i]++; 62 for ( i=0; i<n+1; i++ ) a->i[i]++; 63 *ia = a->i; *ja = a->j; 64 } else { 65 *ia = a->i; *ja = a->j; 66 } 67 68 return 0; 69 } 70 71 #undef __FUNC__ 72 #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ" /* ADIC Ignore */ 73 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 74 PetscTruth *done) 75 { 76 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 77 int i,n = a->mbs; 78 79 if (!ia) return 0; 80 if (symmetric) { 81 PetscFree(*ia); 82 PetscFree(*ja); 83 } else if (oshift == 1) { 84 int nz = a->i[n]; 85 for ( i=0; i<nz; i++ ) a->j[i]--; 86 for ( i=0; i<n+1; i++ ) a->i[i]--; 87 } 88 return 0; 89 } 90 91 92 #undef __FUNC__ 93 #define __FUNC__ "MatView_SeqBAIJ_Binary" /* ADIC Ignore */ 94 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 95 { 96 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 97 int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 98 Scalar *aa; 99 100 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 101 col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 102 col_lens[0] = MAT_COOKIE; 103 104 col_lens[1] = a->m; 105 col_lens[2] = a->n; 106 col_lens[3] = a->nz*bs2; 107 108 /* store lengths of each row and write (including header) to file */ 109 count = 0; 110 for ( i=0; i<a->mbs; i++ ) { 111 for ( j=0; j<bs; j++ ) { 112 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 113 } 114 } 115 ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr); 116 PetscFree(col_lens); 117 118 /* store column indices (zero start index) */ 119 jj = (int *) PetscMalloc( (a->nz+1)*bs2*sizeof(int) ); CHKPTRQ(jj); 120 count = 0; 121 for ( i=0; i<a->mbs; i++ ) { 122 for ( j=0; j<bs; j++ ) { 123 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 124 for ( l=0; l<bs; l++ ) { 125 jj[count++] = bs*a->j[k] + l; 126 } 127 } 128 } 129 } 130 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr); 131 PetscFree(jj); 132 133 /* store nonzero values */ 134 aa = (Scalar *) PetscMalloc((a->nz+1)*bs2*sizeof(Scalar)); CHKPTRQ(aa); 135 count = 0; 136 for ( i=0; i<a->mbs; i++ ) { 137 for ( j=0; j<bs; j++ ) { 138 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 139 for ( l=0; l<bs; l++ ) { 140 aa[count++] = a->a[bs2*k + l*bs + j]; 141 } 142 } 143 } 144 } 145 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr); 146 PetscFree(aa); 147 return 0; 148 } 149 150 #undef __FUNC__ 151 #define __FUNC__ "MatView_SeqBAIJ_ASCII" /* ADIC Ignore */ 152 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 153 { 154 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 155 int ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2; 156 FILE *fd; 157 char *outputname; 158 159 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 160 ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr); 161 ierr = ViewerGetFormat(viewer,&format); 162 if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) { 163 fprintf(fd," block size is %d\n",bs); 164 } 165 else if (format == VIEWER_FORMAT_ASCII_MATLAB) { 166 SETERRQ(1,0,"Matlab format not supported"); 167 } 168 else if (format == VIEWER_FORMAT_ASCII_COMMON) { 169 for ( i=0; i<a->mbs; i++ ) { 170 for ( j=0; j<bs; j++ ) { 171 fprintf(fd,"row %d:",i*bs+j); 172 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 173 for ( l=0; l<bs; l++ ) { 174 #if defined(PETSC_COMPLEX) 175 if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0) 176 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 177 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 178 else if (real(a->a[bs2*k + l*bs + j]) != 0.0) 179 fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 180 #else 181 if (a->a[bs2*k + l*bs + j] != 0.0) 182 fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 183 #endif 184 } 185 } 186 fprintf(fd,"\n"); 187 } 188 } 189 } 190 else { 191 for ( i=0; i<a->mbs; i++ ) { 192 for ( j=0; j<bs; j++ ) { 193 fprintf(fd,"row %d:",i*bs+j); 194 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 195 for ( l=0; l<bs; l++ ) { 196 #if defined(PETSC_COMPLEX) 197 if (imag(a->a[bs2*k + l*bs + j]) != 0.0) { 198 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 199 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 200 } 201 else { 202 fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 203 } 204 #else 205 fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 206 #endif 207 } 208 } 209 fprintf(fd,"\n"); 210 } 211 } 212 } 213 fflush(fd); 214 return 0; 215 } 216 217 #undef __FUNC__ 218 #define __FUNC__ "MatView_SeqBAIJ_Draw" /* ADIC Ignore */ 219 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer) 220 { 221 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *) A->data; 222 int row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2; 223 double xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r; 224 Scalar *aa; 225 Draw draw; 226 DrawButton button; 227 PetscTruth isnull; 228 229 ViewerDrawGetDraw(viewer,&draw); 230 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 231 232 xr = a->n; yr = a->m; h = yr/10.0; w = xr/10.0; 233 xr += w; yr += h; xl = -w; yl = -h; 234 ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr); 235 /* loop over matrix elements drawing boxes */ 236 color = DRAW_BLUE; 237 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 238 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 239 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 240 x_l = a->j[j]*bs; x_r = x_l + 1.0; 241 aa = a->a + j*bs2; 242 for ( k=0; k<bs; k++ ) { 243 for ( l=0; l<bs; l++ ) { 244 if (PetscReal(*aa++) >= 0.) continue; 245 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 246 } 247 } 248 } 249 } 250 color = DRAW_CYAN; 251 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 252 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 253 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 254 x_l = a->j[j]*bs; x_r = x_l + 1.0; 255 aa = a->a + j*bs2; 256 for ( k=0; k<bs; k++ ) { 257 for ( l=0; l<bs; l++ ) { 258 if (PetscReal(*aa++) != 0.) continue; 259 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 260 } 261 } 262 } 263 } 264 265 color = DRAW_RED; 266 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 267 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 268 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 269 x_l = a->j[j]*bs; x_r = x_l + 1.0; 270 aa = a->a + j*bs2; 271 for ( k=0; k<bs; k++ ) { 272 for ( l=0; l<bs; l++ ) { 273 if (PetscReal(*aa++) <= 0.) continue; 274 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 275 } 276 } 277 } 278 } 279 280 DrawFlush(draw); 281 DrawGetPause(draw,&pause); 282 if (pause >= 0) { PetscSleep(pause); return 0;} 283 284 /* allow the matrix to zoom or shrink */ 285 ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0); 286 while (button != BUTTON_RIGHT) { 287 DrawClear(draw); 288 if (button == BUTTON_LEFT) scale = .5; 289 else if (button == BUTTON_CENTER) scale = 2.; 290 xl = scale*(xl + w - xc) + xc - w*scale; 291 xr = scale*(xr - w - xc) + xc + w*scale; 292 yl = scale*(yl + h - yc) + yc - h*scale; 293 yr = scale*(yr - h - yc) + yc + h*scale; 294 w *= scale; h *= scale; 295 ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr); 296 color = DRAW_BLUE; 297 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 298 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 299 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 300 x_l = a->j[j]*bs; x_r = x_l + 1.0; 301 aa = a->a + j*bs2; 302 for ( k=0; k<bs; k++ ) { 303 for ( l=0; l<bs; l++ ) { 304 if (PetscReal(*aa++) >= 0.) continue; 305 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 306 } 307 } 308 } 309 } 310 color = DRAW_CYAN; 311 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 312 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 313 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 314 x_l = a->j[j]*bs; x_r = x_l + 1.0; 315 aa = a->a + j*bs2; 316 for ( k=0; k<bs; k++ ) { 317 for ( l=0; l<bs; l++ ) { 318 if (PetscReal(*aa++) != 0.) continue; 319 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 320 } 321 } 322 } 323 } 324 325 color = DRAW_RED; 326 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 327 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 328 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 329 x_l = a->j[j]*bs; x_r = x_l + 1.0; 330 aa = a->a + j*bs2; 331 for ( k=0; k<bs; k++ ) { 332 for ( l=0; l<bs; l++ ) { 333 if (PetscReal(*aa++) <= 0.) continue; 334 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 335 } 336 } 337 } 338 } 339 ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0); 340 } 341 return 0; 342 } 343 344 #undef __FUNC__ 345 #define __FUNC__ "MatView_SeqBAIJ" /* ADIC Ignore */ 346 int MatView_SeqBAIJ(PetscObject obj,Viewer viewer) 347 { 348 Mat A = (Mat) obj; 349 ViewerType vtype; 350 int ierr; 351 352 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 353 if (vtype == MATLAB_VIEWER) { 354 SETERRQ(1,0,"Matlab viewer not supported"); 355 } 356 else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){ 357 return MatView_SeqBAIJ_ASCII(A,viewer); 358 } 359 else if (vtype == BINARY_FILE_VIEWER) { 360 return MatView_SeqBAIJ_Binary(A,viewer); 361 } 362 else if (vtype == DRAW_VIEWER) { 363 return MatView_SeqBAIJ_Draw(A,viewer); 364 } 365 return 0; 366 } 367 368 #define CHUNKSIZE 10 369 370 #undef __FUNC__ 371 #define __FUNC__ "MatSetValues_SeqBAIJ" 372 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 373 { 374 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 375 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 376 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 377 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 378 int ridx,cidx,bs2=a->bs2; 379 Scalar *ap,value,*aa=a->a,*bap; 380 381 for ( k=0; k<m; k++ ) { /* loop over added rows */ 382 row = im[k]; brow = row/bs; 383 #if defined(PETSC_BOPT_g) 384 if (row < 0) SETERRQ(1,0,"Negative row"); 385 if (row >= a->m) SETERRQ(1,0,"Row too large"); 386 #endif 387 rp = aj + ai[brow]; 388 ap = aa + bs2*ai[brow]; 389 rmax = imax[brow]; 390 nrow = ailen[brow]; 391 low = 0; 392 for ( l=0; l<n; l++ ) { /* loop over added columns */ 393 #if defined(PETSC_BOPT_g) 394 if (in[l] < 0) SETERRQ(1,0,"Negative column"); 395 if (in[l] >= a->n) SETERRQ(1,0,"Column too large"); 396 #endif 397 col = in[l]; bcol = col/bs; 398 ridx = row % bs; cidx = col % bs; 399 if (roworiented) { 400 value = *v++; 401 } 402 else { 403 value = v[k + l*m]; 404 } 405 if (!sorted) low = 0; high = nrow; 406 while (high-low > 7) { 407 t = (low+high)/2; 408 if (rp[t] > bcol) high = t; 409 else low = t; 410 } 411 for ( i=low; i<high; i++ ) { 412 if (rp[i] > bcol) break; 413 if (rp[i] == bcol) { 414 bap = ap + bs2*i + bs*cidx + ridx; 415 if (is == ADD_VALUES) *bap += value; 416 else *bap = value; 417 goto noinsert1; 418 } 419 } 420 if (nonew == 1) goto noinsert1; 421 else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); 422 if (nrow >= rmax) { 423 /* there is no extra room in row, therefore enlarge */ 424 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 425 Scalar *new_a; 426 427 if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); 428 429 /* Malloc new storage space */ 430 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 431 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 432 new_j = (int *) (new_a + bs2*new_nz); 433 new_i = new_j + new_nz; 434 435 /* copy over old data into new slots */ 436 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} 437 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 438 PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); 439 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 440 PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, 441 len*sizeof(int)); 442 PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); 443 PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 444 PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), 445 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); 446 /* free up old matrix storage */ 447 PetscFree(a->a); 448 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 449 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 450 a->singlemalloc = 1; 451 452 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 453 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 454 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 455 a->maxnz += bs2*CHUNKSIZE; 456 a->reallocs++; 457 a->nz++; 458 } 459 N = nrow++ - 1; 460 /* shift up all the later entries in this row */ 461 for ( ii=N; ii>=i; ii-- ) { 462 rp[ii+1] = rp[ii]; 463 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 464 } 465 if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 466 rp[i] = bcol; 467 ap[bs2*i + bs*cidx + ridx] = value; 468 noinsert1:; 469 low = i; 470 } 471 ailen[brow] = nrow; 472 } 473 return 0; 474 } 475 476 #undef __FUNC__ 477 #define __FUNC__ "MatSetValuesBlocked_SeqBAIJ" 478 int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 479 { 480 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 481 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 482 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 483 int roworiented=a->roworiented; 484 int *aj=a->j,nonew=a->nonew; 485 int bs2=a->bs2,bs=a->bs,stepval; 486 Scalar *ap,*value=v,*aa=a->a,*bap; 487 488 if (roworiented) { 489 stepval = (n-1)*bs; 490 } else { 491 stepval = (m-1)*bs; 492 } 493 for ( k=0; k<m; k++ ) { /* loop over added rows */ 494 row = im[k]; 495 #if defined(PETSC_BOPT_g) 496 if (row < 0) SETERRQ(1,0,"Negative row"); 497 if (row >= a->mbs) SETERRQ(1,0,"Row too large"); 498 #endif 499 rp = aj + ai[row]; 500 ap = aa + bs2*ai[row]; 501 rmax = imax[row]; 502 nrow = ailen[row]; 503 low = 0; 504 for ( l=0; l<n; l++ ) { /* loop over added columns */ 505 #if defined(PETSC_BOPT_g) 506 if (in[l] < 0) SETERRQ(1,0,"Negative column"); 507 if (in[l] >= a->nbs) SETERRQ(1,0,"Column too large"); 508 #endif 509 col = in[l]; 510 if (roworiented) { 511 value = v + k*(stepval+bs)*bs + l*bs; 512 } else { 513 value = v + l*(stepval+bs)*bs + k*bs; 514 } 515 if (!sorted) low = 0; high = nrow; 516 while (high-low > 7) { 517 t = (low+high)/2; 518 if (rp[t] > col) high = t; 519 else low = t; 520 } 521 for ( i=low; i<high; i++ ) { 522 if (rp[i] > col) break; 523 if (rp[i] == col) { 524 bap = ap + bs2*i; 525 if (roworiented) { 526 if (is == ADD_VALUES) { 527 for ( ii=0; ii<bs; ii++,value+=stepval ) 528 for (jj=ii; jj<bs2; jj+=bs ) 529 bap[jj] += *value++; 530 } else { 531 for ( ii=0; ii<bs; ii++,value+=stepval ) 532 for (jj=ii; jj<bs2; jj+=bs ) 533 bap[jj] = *value++; 534 } 535 } else { 536 if (is == ADD_VALUES) { 537 for ( ii=0; ii<bs; ii++,value+=stepval ) 538 for (jj=0; jj<bs; jj++ ) 539 *bap++ += *value++; 540 } else { 541 for ( ii=0; ii<bs; ii++,value+=stepval ) 542 for (jj=0; jj<bs; jj++ ) 543 *bap++ = *value++; 544 } 545 } 546 goto noinsert2; 547 } 548 } 549 if (nonew == 1) goto noinsert2; 550 else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); 551 if (nrow >= rmax) { 552 /* there is no extra room in row, therefore enlarge */ 553 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 554 Scalar *new_a; 555 556 if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); 557 558 /* malloc new storage space */ 559 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 560 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 561 new_j = (int *) (new_a + bs2*new_nz); 562 new_i = new_j + new_nz; 563 564 /* copy over old data into new slots */ 565 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} 566 for ( ii=row+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 567 PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int)); 568 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 569 PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow, 570 len*sizeof(int)); 571 PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(Scalar)); 572 PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 573 PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE), 574 aa+bs2*(ai[row]+nrow),bs2*len*sizeof(Scalar)); 575 /* free up old matrix storage */ 576 PetscFree(a->a); 577 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 578 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 579 a->singlemalloc = 1; 580 581 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 582 rmax = imax[row] = imax[row] + CHUNKSIZE; 583 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 584 a->maxnz += bs2*CHUNKSIZE; 585 a->reallocs++; 586 a->nz++; 587 } 588 N = nrow++ - 1; 589 /* shift up all the later entries in this row */ 590 for ( ii=N; ii>=i; ii-- ) { 591 rp[ii+1] = rp[ii]; 592 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 593 } 594 if (N >= i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 595 rp[i] = col; 596 bap = ap + bs2*i; 597 if (roworiented) { 598 for ( ii=0; ii<bs; ii++,value+=stepval) 599 for (jj=ii; jj<bs2; jj+=bs ) 600 bap[jj] = *value++; 601 } else { 602 for ( ii=0; ii<bs; ii++,value+=stepval ) 603 for (jj=0; jj<bs; jj++ ) 604 *bap++ = *value++; 605 } 606 noinsert2:; 607 low = i; 608 } 609 ailen[row] = nrow; 610 } 611 return 0; 612 } 613 614 #undef __FUNC__ 615 #define __FUNC__ "MatGetSize_SeqBAIJ" /* ADIC Ignore */ 616 int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 617 { 618 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 619 *m = a->m; *n = a->n; 620 return 0; 621 } 622 623 #undef __FUNC__ 624 #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ" /* ADIC Ignore */ 625 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 626 { 627 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 628 *m = 0; *n = a->m; 629 return 0; 630 } 631 632 #undef __FUNC__ 633 #define __FUNC__ "MatGetRow_SeqBAIJ" 634 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 635 { 636 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 637 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 638 Scalar *aa,*v_i,*aa_i; 639 640 bs = a->bs; 641 ai = a->i; 642 aj = a->j; 643 aa = a->a; 644 bs2 = a->bs2; 645 646 if (row < 0 || row >= a->m) SETERRQ(1,0,"Row out of range"); 647 648 bn = row/bs; /* Block number */ 649 bp = row % bs; /* Block Position */ 650 M = ai[bn+1] - ai[bn]; 651 *nz = bs*M; 652 653 if (v) { 654 *v = 0; 655 if (*nz) { 656 *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v); 657 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 658 v_i = *v + i*bs; 659 aa_i = aa + bs2*(ai[bn] + i); 660 for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];} 661 } 662 } 663 } 664 665 if (idx) { 666 *idx = 0; 667 if (*nz) { 668 *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx); 669 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 670 idx_i = *idx + i*bs; 671 itmp = bs*aj[ai[bn] + i]; 672 for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;} 673 } 674 } 675 } 676 return 0; 677 } 678 679 #undef __FUNC__ 680 #define __FUNC__ "MatRestoreRow_SeqBAIJ" /* ADIC Ignore */ 681 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 682 { 683 if (idx) {if (*idx) PetscFree(*idx);} 684 if (v) {if (*v) PetscFree(*v);} 685 return 0; 686 } 687 688 #undef __FUNC__ 689 #define __FUNC__ "MatTranspose_SeqBAIJ" 690 int MatTranspose_SeqBAIJ(Mat A,Mat *B) 691 { 692 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 693 Mat C; 694 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 695 int *rows,*cols,bs2=a->bs2; 696 Scalar *array=a->a; 697 698 if (B==PETSC_NULL && mbs!=nbs) 699 SETERRQ(1,0,"Square matrix only for in-place"); 700 col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col); 701 PetscMemzero(col,(1+nbs)*sizeof(int)); 702 703 for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1; 704 ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr); 705 PetscFree(col); 706 rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows); 707 cols = rows + bs; 708 for ( i=0; i<mbs; i++ ) { 709 cols[0] = i*bs; 710 for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1; 711 len = ai[i+1] - ai[i]; 712 for ( j=0; j<len; j++ ) { 713 rows[0] = (*aj++)*bs; 714 for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1; 715 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr); 716 array += bs2; 717 } 718 } 719 PetscFree(rows); 720 721 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 722 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 723 724 if (B != PETSC_NULL) { 725 *B = C; 726 } else { 727 /* This isn't really an in-place transpose */ 728 PetscFree(a->a); 729 if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);} 730 if (a->diag) PetscFree(a->diag); 731 if (a->ilen) PetscFree(a->ilen); 732 if (a->imax) PetscFree(a->imax); 733 if (a->solve_work) PetscFree(a->solve_work); 734 PetscFree(a); 735 PetscMemcpy(A,C,sizeof(struct _p_Mat)); 736 PetscHeaderDestroy(C); 737 } 738 return 0; 739 } 740 741 742 #undef __FUNC__ 743 #define __FUNC__ "MatAssemblyEnd_SeqBAIJ" 744 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 745 { 746 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 747 int fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax; 748 int m = a->m,*ip, N, *ailen = a->ilen; 749 int mbs = a->mbs, bs2 = a->bs2,rmax; 750 Scalar *aa = a->a, *ap; 751 752 if (mode == MAT_FLUSH_ASSEMBLY) return 0; 753 754 rmax = ailen[0]; 755 for ( i=1; i<mbs; i++ ) { 756 /* move each row back by the amount of empty slots (fshift) before it*/ 757 fshift += imax[i-1] - ailen[i-1]; 758 rmax = PetscMax(rmax,ailen[i]); 759 if (fshift) { 760 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 761 N = ailen[i]; 762 for ( j=0; j<N; j++ ) { 763 ip[j-fshift] = ip[j]; 764 PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar)); 765 } 766 } 767 ai[i] = ai[i-1] + ailen[i-1]; 768 } 769 if (mbs) { 770 fshift += imax[mbs-1] - ailen[mbs-1]; 771 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 772 } 773 /* reset ilen and imax for each row */ 774 for ( i=0; i<mbs; i++ ) { 775 ailen[i] = imax[i] = ai[i+1] - ai[i]; 776 } 777 a->nz = ai[mbs]; 778 779 /* diagonals may have moved, so kill the diagonal pointers */ 780 if (fshift && a->diag) { 781 PetscFree(a->diag); 782 PLogObjectMemory(A,-(m+1)*sizeof(int)); 783 a->diag = 0; 784 } 785 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 786 m,a->n,a->bs,fshift*bs2,a->nz*bs2); 787 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n", 788 a->reallocs); 789 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 790 a->reallocs = 0; 791 A->info.nz_unneeded = (double)fshift*bs2; 792 793 return 0; 794 } 795 796 #undef __FUNC__ 797 #define __FUNC__ "MatZeroEntries_SeqBAIJ" 798 int MatZeroEntries_SeqBAIJ(Mat A) 799 { 800 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 801 PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar)); 802 return 0; 803 } 804 805 #undef __FUNC__ 806 #define __FUNC__ "MatDestroy_SeqBAIJ" /* ADIC Ignore */ 807 int MatDestroy_SeqBAIJ(PetscObject obj) 808 { 809 Mat A = (Mat) obj; 810 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 811 int ierr; 812 813 #if defined(PETSC_LOG) 814 PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 815 #endif 816 PetscFree(a->a); 817 if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);} 818 if (a->diag) PetscFree(a->diag); 819 if (a->ilen) PetscFree(a->ilen); 820 if (a->imax) PetscFree(a->imax); 821 if (a->solve_work) PetscFree(a->solve_work); 822 if (a->mult_work) PetscFree(a->mult_work); 823 PetscFree(a); 824 if (A->mapping) { 825 ierr = ISLocalToGlobalMappingDestroy(A->mapping); CHKERRQ(ierr); 826 } 827 PLogObjectDestroy(A); 828 PetscHeaderDestroy(A); 829 return 0; 830 } 831 832 #undef __FUNC__ 833 #define __FUNC__ "MatSetOption_SeqBAIJ" /* ADIC Ignore */ 834 int MatSetOption_SeqBAIJ(Mat A,MatOption op) 835 { 836 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 837 if (op == MAT_ROW_ORIENTED) a->roworiented = 1; 838 else if (op == MAT_COLUMN_ORIENTED) a->roworiented = 0; 839 else if (op == MAT_COLUMNS_SORTED) a->sorted = 1; 840 else if (op == MAT_COLUMNS_UNSORTED) a->sorted = 0; 841 else if (op == MAT_NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 842 else if (op == MAT_NEW_NONZERO_LOCATION_ERROR) a->nonew = -1; 843 else if (op == MAT_NEW_NONZERO_ALLOCATION_ERROR) a->nonew = -2; 844 else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 845 else if (op == MAT_ROWS_SORTED || 846 op == MAT_ROWS_UNSORTED || 847 op == MAT_SYMMETRIC || 848 op == MAT_STRUCTURALLY_SYMMETRIC || 849 op == MAT_YES_NEW_DIAGONALS || 850 op == MAT_IGNORE_OFF_PROC_ENTRIES) 851 PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n"); 852 else if (op == MAT_NO_NEW_DIAGONALS) 853 {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");} 854 else 855 {SETERRQ(PETSC_ERR_SUP,0,"unknown option");} 856 return 0; 857 } 858 859 860 /* -------------------------------------------------------*/ 861 /* Should check that shapes of vectors and matrices match */ 862 /* -------------------------------------------------------*/ 863 #include "pinclude/plapack.h" 864 865 #undef __FUNC__ 866 #define __FUNC__ "MatMult_SeqBAIJ_1" 867 static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz) 868 { 869 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 870 register Scalar *x,*z,*v,sum; 871 int mbs=a->mbs,i,*idx,*ii,n; 872 873 VecGetArray_Fast(xx,x); 874 VecGetArray_Fast(zz,z); 875 876 idx = a->j; 877 v = a->a; 878 ii = a->i; 879 880 for ( i=0; i<mbs; i++ ) { 881 n = ii[1] - ii[0]; ii++; 882 sum = 0.0; 883 while (n--) sum += *v++ * x[*idx++]; 884 z[i] = sum; 885 } 886 VecRestoreArray_Fast(xx,x); 887 VecRestoreArray_Fast(zz,z); 888 PLogFlops(2*a->nz - a->m); 889 return 0; 890 } 891 892 #undef __FUNC__ 893 #define __FUNC__ "MatMult_SeqBAIJ_2" 894 static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz) 895 { 896 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 897 register Scalar *x,*z,*v,*xb,sum1,sum2; 898 register Scalar x1,x2; 899 int mbs=a->mbs,i,*idx,*ii; 900 int j,n; 901 902 VecGetArray_Fast(xx,x); 903 VecGetArray_Fast(zz,z); 904 905 idx = a->j; 906 v = a->a; 907 ii = a->i; 908 909 for ( i=0; i<mbs; i++ ) { 910 n = ii[1] - ii[0]; ii++; 911 sum1 = 0.0; sum2 = 0.0; 912 for ( j=0; j<n; j++ ) { 913 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 914 sum1 += v[0]*x1 + v[2]*x2; 915 sum2 += v[1]*x1 + v[3]*x2; 916 v += 4; 917 } 918 z[0] = sum1; z[1] = sum2; 919 z += 2; 920 } 921 VecRestoreArray_Fast(xx,x); 922 VecRestoreArray_Fast(zz,z); 923 PLogFlops(4*a->nz - a->m); 924 return 0; 925 } 926 927 #undef __FUNC__ 928 #define __FUNC__ "MatMult_SeqBAIJ_3" 929 static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz) 930 { 931 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 932 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 933 int mbs=a->mbs,i,*idx,*ii,j,n; 934 935 VecGetArray_Fast(xx,x); 936 VecGetArray_Fast(zz,z); 937 938 idx = a->j; 939 v = a->a; 940 ii = a->i; 941 942 for ( i=0; i<mbs; i++ ) { 943 n = ii[1] - ii[0]; ii++; 944 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 945 for ( j=0; j<n; j++ ) { 946 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 947 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 948 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 949 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 950 v += 9; 951 } 952 z[0] = sum1; z[1] = sum2; z[2] = sum3; 953 z += 3; 954 } 955 VecRestoreArray_Fast(xx,x); 956 VecRestoreArray_Fast(zz,z); 957 PLogFlops(18*a->nz - a->m); 958 return 0; 959 } 960 961 #undef __FUNC__ 962 #define __FUNC__ "MatMult_SeqBAIJ_4" 963 static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz) 964 { 965 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 966 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4; 967 register Scalar x1,x2,x3,x4; 968 int mbs=a->mbs,i,*idx,*ii; 969 int j,n; 970 971 VecGetArray_Fast(xx,x); 972 VecGetArray_Fast(zz,z); 973 974 idx = a->j; 975 v = a->a; 976 ii = a->i; 977 978 for ( i=0; i<mbs; i++ ) { 979 n = ii[1] - ii[0]; ii++; 980 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 981 for ( j=0; j<n; j++ ) { 982 xb = x + 4*(*idx++); 983 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 984 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 985 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 986 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 987 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 988 v += 16; 989 } 990 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 991 z += 4; 992 } 993 VecRestoreArray_Fast(xx,x); 994 VecRestoreArray_Fast(zz,z); 995 PLogFlops(32*a->nz - a->m); 996 return 0; 997 } 998 999 #undef __FUNC__ 1000 #define __FUNC__ "MatMult_SeqBAIJ_5" 1001 static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz) 1002 { 1003 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1004 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5; 1005 register Scalar x1,x2,x3,x4,x5; 1006 int mbs=a->mbs,i,*idx,*ii,j,n; 1007 1008 VecGetArray_Fast(xx,x); 1009 VecGetArray_Fast(zz,z); 1010 1011 idx = a->j; 1012 v = a->a; 1013 ii = a->i; 1014 1015 for ( i=0; i<mbs; i++ ) { 1016 n = ii[1] - ii[0]; ii++; 1017 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 1018 for ( j=0; j<n; j++ ) { 1019 xb = x + 5*(*idx++); 1020 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 1021 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 1022 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 1023 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 1024 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 1025 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 1026 v += 25; 1027 } 1028 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 1029 z += 5; 1030 } 1031 VecRestoreArray_Fast(xx,x); 1032 VecRestoreArray_Fast(zz,z); 1033 PLogFlops(50*a->nz - a->m); 1034 return 0; 1035 } 1036 1037 #undef __FUNC__ 1038 #define __FUNC__ "MatMult_SeqBAIJ_7" 1039 static int MatMult_SeqBAIJ_7(Mat A,Vec xx,Vec zz) 1040 { 1041 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1042 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7; 1043 register Scalar x1,x2,x3,x4,x5,x6,x7; 1044 int mbs=a->mbs,i,*idx,*ii,j,n; 1045 1046 VecGetArray_Fast(xx,x); 1047 VecGetArray_Fast(zz,z); 1048 1049 idx = a->j; 1050 v = a->a; 1051 ii = a->i; 1052 1053 for ( i=0; i<mbs; i++ ) { 1054 n = ii[1] - ii[0]; ii++; 1055 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0; 1056 for ( j=0; j<n; j++ ) { 1057 xb = x + 7*(*idx++); 1058 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6]; 1059 sum1 += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 1060 sum2 += v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 1061 sum3 += v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 1062 sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 1063 sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 1064 sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 1065 sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 1066 v += 49; 1067 } 1068 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7; 1069 z += 7; 1070 } 1071 1072 VecRestoreArray_Fast(xx,x); 1073 VecRestoreArray_Fast(zz,z); 1074 PLogFlops(98*a->nz - a->m); 1075 return 0; 1076 } 1077 1078 #undef __FUNC__ 1079 #define __FUNC__ "MatMult_SeqBAIJ_N" 1080 static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz) 1081 { 1082 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1083 register Scalar *x,*z,*v,*xb; 1084 int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2; 1085 int _One = 1,ncols,k; 1086 Scalar _DOne = 1.0,*work,*workt,_DZero = 0.0; 1087 1088 VecGetArray_Fast(xx,x); 1089 VecGetArray_Fast(zz,z); 1090 1091 idx = a->j; 1092 v = a->a; 1093 ii = a->i; 1094 1095 1096 if (!a->mult_work) { 1097 k = PetscMax(a->m,a->n); 1098 a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work); 1099 } 1100 work = a->mult_work; 1101 for ( i=0; i<mbs; i++ ) { 1102 n = ii[1] - ii[0]; ii++; 1103 ncols = n*bs; 1104 workt = work; 1105 for ( j=0; j<n; j++ ) { 1106 xb = x + bs*(*idx++); 1107 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 1108 workt += bs; 1109 } 1110 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One); 1111 v += n*bs2; 1112 z += bs; 1113 } 1114 VecRestoreArray_Fast(xx,x); 1115 VecRestoreArray_Fast(zz,z); 1116 PLogFlops(2*a->nz*bs2 - a->m); 1117 return 0; 1118 } 1119 1120 #undef __FUNC__ 1121 #define __FUNC__ "MatMultAdd_SeqBAIJ_1" 1122 static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 1123 { 1124 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1125 register Scalar *x,*y,*z,*v,sum; 1126 int mbs=a->mbs,i,*idx,*ii,n; 1127 1128 VecGetArray_Fast(xx,x); 1129 VecGetArray_Fast(yy,y); 1130 VecGetArray_Fast(zz,z); 1131 1132 idx = a->j; 1133 v = a->a; 1134 ii = a->i; 1135 1136 for ( i=0; i<mbs; i++ ) { 1137 n = ii[1] - ii[0]; ii++; 1138 sum = y[i]; 1139 while (n--) sum += *v++ * x[*idx++]; 1140 z[i] = sum; 1141 } 1142 VecRestoreArray_Fast(xx,x); 1143 VecRestoreArray_Fast(yy,y); 1144 VecRestoreArray_Fast(zz,z); 1145 PLogFlops(2*a->nz); 1146 return 0; 1147 } 1148 1149 #undef __FUNC__ 1150 #define __FUNC__ "MatMultAdd_SeqBAIJ_2" 1151 static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 1152 { 1153 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1154 register Scalar *x,*y,*z,*v,*xb,sum1,sum2; 1155 register Scalar x1,x2; 1156 int mbs=a->mbs,i,*idx,*ii; 1157 int j,n; 1158 1159 VecGetArray_Fast(xx,x); 1160 VecGetArray_Fast(yy,y); 1161 VecGetArray_Fast(zz,z); 1162 1163 idx = a->j; 1164 v = a->a; 1165 ii = a->i; 1166 1167 for ( i=0; i<mbs; i++ ) { 1168 n = ii[1] - ii[0]; ii++; 1169 sum1 = y[0]; sum2 = y[1]; 1170 for ( j=0; j<n; j++ ) { 1171 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 1172 sum1 += v[0]*x1 + v[2]*x2; 1173 sum2 += v[1]*x1 + v[3]*x2; 1174 v += 4; 1175 } 1176 z[0] = sum1; z[1] = sum2; 1177 z += 2; y += 2; 1178 } 1179 VecRestoreArray_Fast(xx,x); 1180 VecRestoreArray_Fast(yy,y); 1181 VecRestoreArray_Fast(zz,z); 1182 PLogFlops(4*a->nz); 1183 return 0; 1184 } 1185 1186 #undef __FUNC__ 1187 #define __FUNC__ "MatMultAdd_SeqBAIJ_3" 1188 static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 1189 { 1190 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1191 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 1192 int mbs=a->mbs,i,*idx,*ii,j,n; 1193 1194 VecGetArray_Fast(xx,x); 1195 VecGetArray_Fast(yy,y); 1196 VecGetArray_Fast(zz,z); 1197 1198 idx = a->j; 1199 v = a->a; 1200 ii = a->i; 1201 1202 for ( i=0; i<mbs; i++ ) { 1203 n = ii[1] - ii[0]; ii++; 1204 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; 1205 for ( j=0; j<n; j++ ) { 1206 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1207 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 1208 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 1209 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 1210 v += 9; 1211 } 1212 z[0] = sum1; z[1] = sum2; z[2] = sum3; 1213 z += 3; y += 3; 1214 } 1215 VecRestoreArray_Fast(xx,x); 1216 VecRestoreArray_Fast(yy,y); 1217 VecRestoreArray_Fast(zz,z); 1218 PLogFlops(18*a->nz); 1219 return 0; 1220 } 1221 1222 #undef __FUNC__ 1223 #define __FUNC__ "MatMultAdd_SeqBAIJ_4" 1224 static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 1225 { 1226 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1227 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4; 1228 register Scalar x1,x2,x3,x4; 1229 int mbs=a->mbs,i,*idx,*ii; 1230 int j,n; 1231 1232 VecGetArray_Fast(xx,x); 1233 VecGetArray_Fast(yy,y); 1234 VecGetArray_Fast(zz,z); 1235 1236 idx = a->j; 1237 v = a->a; 1238 ii = a->i; 1239 1240 for ( i=0; i<mbs; i++ ) { 1241 n = ii[1] - ii[0]; ii++; 1242 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; 1243 for ( j=0; j<n; j++ ) { 1244 xb = x + 4*(*idx++); 1245 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1246 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 1247 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 1248 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 1249 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 1250 v += 16; 1251 } 1252 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 1253 z += 4; y += 4; 1254 } 1255 VecRestoreArray_Fast(xx,x); 1256 VecRestoreArray_Fast(yy,y); 1257 VecRestoreArray_Fast(zz,z); 1258 PLogFlops(32*a->nz); 1259 return 0; 1260 } 1261 1262 #undef __FUNC__ 1263 #define __FUNC__ "MatMultAdd_SeqBAIJ_5" 1264 static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 1265 { 1266 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1267 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5; 1268 register Scalar x1,x2,x3,x4,x5; 1269 int mbs=a->mbs,i,*idx,*ii,j,n; 1270 1271 VecGetArray_Fast(xx,x); 1272 VecGetArray_Fast(yy,y); 1273 VecGetArray_Fast(zz,z); 1274 1275 idx = a->j; 1276 v = a->a; 1277 ii = a->i; 1278 1279 for ( i=0; i<mbs; i++ ) { 1280 n = ii[1] - ii[0]; ii++; 1281 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; 1282 for ( j=0; j<n; j++ ) { 1283 xb = x + 5*(*idx++); 1284 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 1285 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 1286 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 1287 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 1288 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 1289 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 1290 v += 25; 1291 } 1292 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 1293 z += 5; y += 5; 1294 } 1295 VecRestoreArray_Fast(xx,x); 1296 VecRestoreArray_Fast(yy,y); 1297 VecRestoreArray_Fast(zz,z); 1298 PLogFlops(50*a->nz); 1299 return 0; 1300 } 1301 1302 #undef __FUNC__ 1303 #define __FUNC__ "MatMultAdd_SeqBAIJ_7" 1304 static int MatMultAdd_SeqBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1305 { 1306 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1307 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7; 1308 register Scalar x1,x2,x3,x4,x5,x6,x7; 1309 int mbs=a->mbs,i,*idx,*ii,j,n; 1310 1311 VecGetArray_Fast(xx,x); 1312 VecGetArray_Fast(yy,y); 1313 VecGetArray_Fast(zz,z); 1314 1315 idx = a->j; 1316 v = a->a; 1317 ii = a->i; 1318 1319 for ( i=0; i<mbs; i++ ) { 1320 n = ii[1] - ii[0]; ii++; 1321 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; sum7 = y[6]; 1322 for ( j=0; j<n; j++ ) { 1323 xb = x + 7*(*idx++); 1324 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6]; 1325 sum1 += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 1326 sum2 += v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 1327 sum3 += v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 1328 sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 1329 sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 1330 sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 1331 sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 1332 v += 49; 1333 } 1334 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7; 1335 z += 7; y += 7; 1336 } 1337 VecRestoreArray_Fast(xx,x); 1338 VecRestoreArray_Fast(yy,y); 1339 VecRestoreArray_Fast(zz,z); 1340 PLogFlops(98*a->nz); 1341 return 0; 1342 } 1343 1344 1345 #undef __FUNC__ 1346 #define __FUNC__ "MatMultAdd_SeqBAIJ_N" 1347 static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 1348 { 1349 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1350 register Scalar *x,*z,*v,*xb; 1351 int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr; 1352 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1353 1354 if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 1355 1356 VecGetArray_Fast(xx,x); 1357 VecGetArray_Fast(zz,z); 1358 1359 idx = a->j; 1360 v = a->a; 1361 ii = a->i; 1362 1363 1364 if (!a->mult_work) { 1365 k = PetscMax(a->m,a->n); 1366 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work); 1367 } 1368 work = a->mult_work; 1369 for ( i=0; i<mbs; i++ ) { 1370 n = ii[1] - ii[0]; ii++; 1371 ncols = n*bs; 1372 workt = work; 1373 for ( j=0; j<n; j++ ) { 1374 xb = x + bs*(*idx++); 1375 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 1376 workt += bs; 1377 } 1378 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); 1379 v += n*bs2; 1380 z += bs; 1381 } 1382 VecRestoreArray_Fast(xx,x); 1383 VecRestoreArray_Fast(zz,z); 1384 PLogFlops(2*a->nz*bs2 ); 1385 return 0; 1386 } 1387 1388 #undef __FUNC__ 1389 #define __FUNC__ "MatMultTrans_SeqBAIJ" 1390 int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz) 1391 { 1392 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1393 Scalar *xg,*zg,*zb; 1394 register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 1395 int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 1396 int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1397 1398 1399 VecGetArray_Fast(xx,xg); x = xg; 1400 VecGetArray_Fast(zz,zg); z = zg; 1401 PetscMemzero(z,N*sizeof(Scalar)); 1402 1403 idx = a->j; 1404 v = a->a; 1405 ii = a->i; 1406 1407 switch (bs) { 1408 case 1: 1409 for ( i=0; i<mbs; i++ ) { 1410 n = ii[1] - ii[0]; ii++; 1411 xb = x + i; x1 = xb[0]; 1412 ib = idx + ai[i]; 1413 for ( j=0; j<n; j++ ) { 1414 rval = ib[j]; 1415 z[rval] += *v++ * x1; 1416 } 1417 } 1418 break; 1419 case 2: 1420 for ( i=0; i<mbs; i++ ) { 1421 n = ii[1] - ii[0]; ii++; 1422 xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1423 ib = idx + ai[i]; 1424 for ( j=0; j<n; j++ ) { 1425 rval = ib[j]*2; 1426 z[rval++] += v[0]*x1 + v[1]*x2; 1427 z[rval++] += v[2]*x1 + v[3]*x2; 1428 v += 4; 1429 } 1430 } 1431 break; 1432 case 3: 1433 for ( i=0; i<mbs; i++ ) { 1434 n = ii[1] - ii[0]; ii++; 1435 xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1436 ib = idx + ai[i]; 1437 for ( j=0; j<n; j++ ) { 1438 rval = ib[j]*3; 1439 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1440 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1441 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1442 v += 9; 1443 } 1444 } 1445 break; 1446 case 4: 1447 for ( i=0; i<mbs; i++ ) { 1448 n = ii[1] - ii[0]; ii++; 1449 xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1450 ib = idx + ai[i]; 1451 for ( j=0; j<n; j++ ) { 1452 rval = ib[j]*4; 1453 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1454 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1455 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1456 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1457 v += 16; 1458 } 1459 } 1460 break; 1461 case 5: 1462 for ( i=0; i<mbs; i++ ) { 1463 n = ii[1] - ii[0]; ii++; 1464 xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1465 x4 = xb[3]; x5 = xb[4]; 1466 ib = idx + ai[i]; 1467 for ( j=0; j<n; j++ ) { 1468 rval = ib[j]*5; 1469 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1470 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1471 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1472 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1473 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1474 v += 25; 1475 } 1476 } 1477 break; 1478 /* block sizes larger then 5 by 5 are handled by BLAS */ 1479 default: { 1480 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1481 if (!a->mult_work) { 1482 k = PetscMax(a->m,a->n); 1483 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1484 CHKPTRQ(a->mult_work); 1485 } 1486 work = a->mult_work; 1487 for ( i=0; i<mbs; i++ ) { 1488 n = ii[1] - ii[0]; ii++; 1489 ncols = n*bs; 1490 PetscMemzero(work,ncols*sizeof(Scalar)); 1491 LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1492 v += n*bs2; 1493 x += bs; 1494 workt = work; 1495 for ( j=0; j<n; j++ ) { 1496 zb = z + bs*(*idx++); 1497 for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1498 workt += bs; 1499 } 1500 } 1501 } 1502 } 1503 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 1504 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1505 PLogFlops(2*a->nz*a->bs2 - a->n); 1506 return 0; 1507 } 1508 1509 #undef __FUNC__ 1510 #define __FUNC__ "MatMultTransAdd_SeqBAIJ" 1511 int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1512 { 1513 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1514 Scalar *xg,*zg,*zb; 1515 register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 1516 int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 1517 int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1518 1519 1520 1521 VecGetArray_Fast(xx,xg); x = xg; 1522 VecGetArray_Fast(zz,zg); z = zg; 1523 1524 if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 1525 else PetscMemzero(z,N*sizeof(Scalar)); 1526 1527 1528 idx = a->j; 1529 v = a->a; 1530 ii = a->i; 1531 1532 switch (bs) { 1533 case 1: 1534 for ( i=0; i<mbs; i++ ) { 1535 n = ii[1] - ii[0]; ii++; 1536 xb = x + i; x1 = xb[0]; 1537 ib = idx + ai[i]; 1538 for ( j=0; j<n; j++ ) { 1539 rval = ib[j]; 1540 z[rval] += *v++ * x1; 1541 } 1542 } 1543 break; 1544 case 2: 1545 for ( i=0; i<mbs; i++ ) { 1546 n = ii[1] - ii[0]; ii++; 1547 xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1548 ib = idx + ai[i]; 1549 for ( j=0; j<n; j++ ) { 1550 rval = ib[j]*2; 1551 z[rval++] += v[0]*x1 + v[1]*x2; 1552 z[rval++] += v[2]*x1 + v[3]*x2; 1553 v += 4; 1554 } 1555 } 1556 break; 1557 case 3: 1558 for ( i=0; i<mbs; i++ ) { 1559 n = ii[1] - ii[0]; ii++; 1560 xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1561 ib = idx + ai[i]; 1562 for ( j=0; j<n; j++ ) { 1563 rval = ib[j]*3; 1564 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1565 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1566 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1567 v += 9; 1568 } 1569 } 1570 break; 1571 case 4: 1572 for ( i=0; i<mbs; i++ ) { 1573 n = ii[1] - ii[0]; ii++; 1574 xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1575 ib = idx + ai[i]; 1576 for ( j=0; j<n; j++ ) { 1577 rval = ib[j]*4; 1578 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1579 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1580 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1581 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1582 v += 16; 1583 } 1584 } 1585 break; 1586 case 5: 1587 for ( i=0; i<mbs; i++ ) { 1588 n = ii[1] - ii[0]; ii++; 1589 xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1590 x4 = xb[3]; x5 = xb[4]; 1591 ib = idx + ai[i]; 1592 for ( j=0; j<n; j++ ) { 1593 rval = ib[j]*5; 1594 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1595 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1596 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1597 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1598 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1599 v += 25; 1600 } 1601 } 1602 break; 1603 /* block sizes larger then 5 by 5 are handled by BLAS */ 1604 default: { 1605 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1606 if (!a->mult_work) { 1607 k = PetscMax(a->m,a->n); 1608 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1609 CHKPTRQ(a->mult_work); 1610 } 1611 work = a->mult_work; 1612 for ( i=0; i<mbs; i++ ) { 1613 n = ii[1] - ii[0]; ii++; 1614 ncols = n*bs; 1615 PetscMemzero(work,ncols*sizeof(Scalar)); 1616 LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1617 v += n*bs2; 1618 x += bs; 1619 workt = work; 1620 for ( j=0; j<n; j++ ) { 1621 zb = z + bs*(*idx++); 1622 for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1623 workt += bs; 1624 } 1625 } 1626 } 1627 } 1628 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 1629 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1630 PLogFlops(2*a->nz*a->bs2); 1631 return 0; 1632 } 1633 1634 #undef __FUNC__ 1635 #define __FUNC__ "MatGetInfo_SeqBAIJ" /* ADIC Ignore */ 1636 int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info) 1637 { 1638 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1639 1640 info->rows_global = (double)a->m; 1641 info->columns_global = (double)a->n; 1642 info->rows_local = (double)a->m; 1643 info->columns_local = (double)a->n; 1644 info->block_size = a->bs2; 1645 info->nz_allocated = a->maxnz; 1646 info->nz_used = a->bs2*a->nz; 1647 info->nz_unneeded = (double)(info->nz_allocated - info->nz_used); 1648 /* if (info->nz_unneeded != A->info.nz_unneeded) 1649 printf("space descrepancy: maxnz-nz = %d, nz_unneeded = %d\n",(int)info->nz_unneeded,(int)A->info.nz_unneeded); */ 1650 info->assemblies = A->num_ass; 1651 info->mallocs = a->reallocs; 1652 info->memory = A->mem; 1653 if (A->factor) { 1654 info->fill_ratio_given = A->info.fill_ratio_given; 1655 info->fill_ratio_needed = A->info.fill_ratio_needed; 1656 info->factor_mallocs = A->info.factor_mallocs; 1657 } else { 1658 info->fill_ratio_given = 0; 1659 info->fill_ratio_needed = 0; 1660 info->factor_mallocs = 0; 1661 } 1662 return 0; 1663 } 1664 1665 #undef __FUNC__ 1666 #define __FUNC__ "MatEqual_SeqBAIJ" 1667 int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg) 1668 { 1669 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 1670 1671 if (B->type !=MATSEQBAIJ)SETERRQ(1,0,"Matrices must be same type"); 1672 1673 /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 1674 if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)|| 1675 (a->nz != b->nz)) { 1676 *flg = PETSC_FALSE; return 0; 1677 } 1678 1679 /* if the a->i are the same */ 1680 if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) { 1681 *flg = PETSC_FALSE; return 0; 1682 } 1683 1684 /* if a->j are the same */ 1685 if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) { 1686 *flg = PETSC_FALSE; return 0; 1687 } 1688 1689 /* if a->a are the same */ 1690 if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) { 1691 *flg = PETSC_FALSE; return 0; 1692 } 1693 *flg = PETSC_TRUE; 1694 return 0; 1695 1696 } 1697 1698 #undef __FUNC__ 1699 #define __FUNC__ "MatGetDiagonal_SeqBAIJ" 1700 int MatGetDiagonal_SeqBAIJ(Mat A,Vec v) 1701 { 1702 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1703 int i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 1704 Scalar *x, zero = 0.0,*aa,*aa_j; 1705 1706 if (A->factor) SETERRQ(1,0,"Not for factored matrix"); 1707 bs = a->bs; 1708 aa = a->a; 1709 ai = a->i; 1710 aj = a->j; 1711 ambs = a->mbs; 1712 bs2 = a->bs2; 1713 1714 VecSet(&zero,v); 1715 VecGetArray_Fast(v,x); VecGetLocalSize_Fast(v,n); 1716 if (n != a->m) SETERRQ(1,0,"Nonconforming matrix and vector"); 1717 for ( i=0; i<ambs; i++ ) { 1718 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1719 if (aj[j] == i) { 1720 row = i*bs; 1721 aa_j = aa+j*bs2; 1722 for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 1723 break; 1724 } 1725 } 1726 } 1727 return 0; 1728 } 1729 1730 #undef __FUNC__ 1731 #define __FUNC__ "MatDiagonalScale_SeqBAIJ" 1732 int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr) 1733 { 1734 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1735 Scalar *l,*r,x,*v,*aa,*li,*ri; 1736 int i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2; 1737 1738 ai = a->i; 1739 aj = a->j; 1740 aa = a->a; 1741 m = a->m; 1742 n = a->n; 1743 bs = a->bs; 1744 mbs = a->mbs; 1745 bs2 = a->bs2; 1746 if (ll) { 1747 VecGetArray_Fast(ll,l); VecGetLocalSize_Fast(ll,lm); 1748 if (lm != m) SETERRQ(1,0,"Left scaling vector wrong length"); 1749 for ( i=0; i<mbs; i++ ) { /* for each block row */ 1750 M = ai[i+1] - ai[i]; 1751 li = l + i*bs; 1752 v = aa + bs2*ai[i]; 1753 for ( j=0; j<M; j++ ) { /* for each block */ 1754 for ( k=0; k<bs2; k++ ) { 1755 (*v++) *= li[k%bs]; 1756 } 1757 } 1758 } 1759 } 1760 1761 if (rr) { 1762 VecGetArray_Fast(rr,r); VecGetLocalSize_Fast(rr,rn); 1763 if (rn != n) SETERRQ(1,0,"Right scaling vector wrong length"); 1764 for ( i=0; i<mbs; i++ ) { /* for each block row */ 1765 M = ai[i+1] - ai[i]; 1766 v = aa + bs2*ai[i]; 1767 for ( j=0; j<M; j++ ) { /* for each block */ 1768 ri = r + bs*aj[ai[i]+j]; 1769 for ( k=0; k<bs; k++ ) { 1770 x = ri[k]; 1771 for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x; 1772 } 1773 } 1774 } 1775 } 1776 return 0; 1777 } 1778 1779 1780 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 1781 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 1782 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int); 1783 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*); 1784 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**); 1785 1786 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec); 1787 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec); 1788 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec); 1789 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec); 1790 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec); 1791 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec); 1792 extern int MatSolve_SeqBAIJ_7(Mat,Vec,Vec); 1793 1794 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 1795 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 1796 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 1797 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 1798 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 1799 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 1800 1801 #undef __FUNC__ 1802 #define __FUNC__ "MatNorm_SeqBAIJ" 1803 int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm) 1804 { 1805 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1806 Scalar *v = a->a; 1807 double sum = 0.0; 1808 int i,nz=a->nz,bs2=a->bs2; 1809 1810 if (type == NORM_FROBENIUS) { 1811 for (i=0; i< bs2*nz; i++ ) { 1812 #if defined(PETSC_COMPLEX) 1813 sum += real(conj(*v)*(*v)); v++; 1814 #else 1815 sum += (*v)*(*v); v++; 1816 #endif 1817 } 1818 *norm = sqrt(sum); 1819 } 1820 else { 1821 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 1822 } 1823 return 0; 1824 } 1825 1826 /* 1827 note: This can only work for identity for row and col. It would 1828 be good to check this and otherwise generate an error. 1829 */ 1830 #undef __FUNC__ 1831 #define __FUNC__ "MatILUFactor_SeqBAIJ" 1832 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill) 1833 { 1834 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1835 Mat outA; 1836 int ierr; 1837 1838 if (fill != 0) SETERRQ(1,0,"Only fill=0 supported"); 1839 1840 outA = inA; 1841 inA->factor = FACTOR_LU; 1842 a->row = row; 1843 a->col = col; 1844 1845 a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 1846 1847 if (!a->diag) { 1848 ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr); 1849 } 1850 ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr); 1851 return 0; 1852 } 1853 1854 #undef __FUNC__ 1855 #define __FUNC__ "MatScale_SeqBAIJ" 1856 int MatScale_SeqBAIJ(Scalar *alpha,Mat inA) 1857 { 1858 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1859 int one = 1, totalnz = a->bs2*a->nz; 1860 BLscal_( &totalnz, alpha, a->a, &one ); 1861 PLogFlops(totalnz); 1862 return 0; 1863 } 1864 1865 #undef __FUNC__ 1866 #define __FUNC__ "MatGetValues_SeqBAIJ" 1867 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 1868 { 1869 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1870 int *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 1871 int *ai = a->i, *ailen = a->ilen; 1872 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 1873 Scalar *ap, *aa = a->a, zero = 0.0; 1874 1875 for ( k=0; k<m; k++ ) { /* loop over rows */ 1876 row = im[k]; brow = row/bs; 1877 if (row < 0) SETERRQ(1,0,"Negative row"); 1878 if (row >= a->m) SETERRQ(1,0,"Row too large"); 1879 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1880 nrow = ailen[brow]; 1881 for ( l=0; l<n; l++ ) { /* loop over columns */ 1882 if (in[l] < 0) SETERRQ(1,0,"Negative column"); 1883 if (in[l] >= a->n) SETERRQ(1,0,"Column too large"); 1884 col = in[l] ; 1885 bcol = col/bs; 1886 cidx = col%bs; 1887 ridx = row%bs; 1888 high = nrow; 1889 low = 0; /* assume unsorted */ 1890 while (high-low > 5) { 1891 t = (low+high)/2; 1892 if (rp[t] > bcol) high = t; 1893 else low = t; 1894 } 1895 for ( i=low; i<high; i++ ) { 1896 if (rp[i] > bcol) break; 1897 if (rp[i] == bcol) { 1898 *v++ = ap[bs2*i+bs*cidx+ridx]; 1899 goto finished; 1900 } 1901 } 1902 *v++ = zero; 1903 finished:; 1904 } 1905 } 1906 return 0; 1907 } 1908 1909 #undef __FUNC__ 1910 #define __FUNC__ "MatGetBlockSize_SeqBAIJ" /* ADIC Ignore */ 1911 int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs) 1912 { 1913 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data; 1914 *bs = baij->bs; 1915 return 0; 1916 } 1917 1918 /* idx should be of length atlease bs */ 1919 #undef __FUNC__ 1920 #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Block" 1921 static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg) 1922 { 1923 int i,row; 1924 row = idx[0]; 1925 if (row%bs!=0) { *flg = PETSC_FALSE; return 0; } 1926 1927 for ( i=1; i<bs; i++ ) { 1928 if (row+i != idx[i]) { *flg = PETSC_FALSE; return 0; } 1929 } 1930 *flg = PETSC_TRUE; 1931 return 0; 1932 } 1933 1934 #undef __FUNC__ 1935 #define __FUNC__ "MatZeroRows_SeqBAIJ" 1936 int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag) 1937 { 1938 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1939 IS is_local; 1940 int ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2; 1941 PetscTruth flg; 1942 Scalar zero = 0.0,*aa; 1943 1944 /* Make a copy of the IS and sort it */ 1945 ierr = ISGetSize(is,&is_n);CHKERRQ(ierr); 1946 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1947 ierr = ISCreateGeneral(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr); 1948 ierr = ISSort(is_local); CHKERRQ(ierr); 1949 ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr); 1950 1951 i = 0; 1952 while (i < is_n) { 1953 if (rows[i]<0 || rows[i]>m) SETERRQ(1,0,"row out of range"); 1954 flg = PETSC_FALSE; 1955 if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); } 1956 if (flg) { /* There exists a block of rows to be Zerowed */ 1957 baij->ilen[rows[i]/bs] = 0; 1958 i += bs; 1959 } else { /* Zero out only the requested row */ 1960 count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs; 1961 aa = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs); 1962 for ( j=0; j<count; j++ ) { 1963 aa[0] = zero; 1964 aa+=bs; 1965 } 1966 i++; 1967 } 1968 } 1969 if (diag) { 1970 for ( j=0; j<is_n; j++ ) { 1971 ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1972 } 1973 } 1974 ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr); 1975 ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr); 1976 ierr = ISDestroy(is_local); CHKERRQ(ierr); 1977 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1978 1979 return 0; 1980 } 1981 1982 #undef __FUNC__ 1983 #define __FUNC__ "MatPrintHelp_SeqBAIJ" /* ADIC Ignore */ 1984 int MatPrintHelp_SeqBAIJ(Mat A) 1985 { 1986 static int called = 0; 1987 MPI_Comm comm = A->comm; 1988 1989 if (called) return 0; else called = 1; 1990 PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n"); 1991 PetscPrintf(comm," -mat_block_size <block_size>\n"); 1992 return 0; 1993 } 1994 1995 /* -------------------------------------------------------------------*/ 1996 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ, 1997 MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ, 1998 MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N, 1999 MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ, 2000 MatSolve_SeqBAIJ_N,0, 2001 0,0, 2002 MatLUFactor_SeqBAIJ,0, 2003 0, 2004 MatTranspose_SeqBAIJ, 2005 MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ, 2006 MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ, 2007 0,MatAssemblyEnd_SeqBAIJ, 2008 0, 2009 MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ, 2010 MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0, 2011 MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ, 2012 MatILUFactorSymbolic_SeqBAIJ,0, 2013 0,0, 2014 MatConvertSameType_SeqBAIJ,0,0, 2015 MatILUFactor_SeqBAIJ,0,0, 2016 MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ, 2017 MatGetValues_SeqBAIJ,0, 2018 MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ, 2019 0,0,0,MatGetBlockSize_SeqBAIJ, 2020 MatGetRowIJ_SeqBAIJ, 2021 MatRestoreRowIJ_SeqBAIJ, 2022 0,0,0,0,0,0, 2023 MatSetValuesBlocked_SeqBAIJ}; 2024 2025 #undef __FUNC__ 2026 #define __FUNC__ "MatCreateSeqBAIJ" 2027 /*@C 2028 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 2029 compressed row) format. For good matrix assembly performance the 2030 user should preallocate the matrix storage by setting the parameter nz 2031 (or the array nzz). By setting these parameters accurately, performance 2032 during matrix assembly can be increased by more than a factor of 50. 2033 2034 Input Parameters: 2035 . comm - MPI communicator, set to PETSC_COMM_SELF 2036 . bs - size of block 2037 . m - number of rows 2038 . n - number of columns 2039 . nz - number of block nonzeros per block row (same for all rows) 2040 . nzz - array containing the number of block nonzeros in the various block rows 2041 (possibly different for each block row) or PETSC_NULL 2042 2043 Output Parameter: 2044 . A - the matrix 2045 2046 Options Database Keys: 2047 $ -mat_no_unroll - uses code that does not unroll the loops in the 2048 $ block calculations (much solver) 2049 $ -mat_block_size - size of the blocks to use 2050 2051 Notes: 2052 The block AIJ format is fully compatible with standard Fortran 77 2053 storage. That is, the stored row and column indices can begin at 2054 either one (as in Fortran) or zero. See the users' manual for details. 2055 2056 Specify the preallocated storage with either nz or nnz (not both). 2057 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2058 allocation. For additional details, see the users manual chapter on 2059 matrices. 2060 2061 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2062 @*/ 2063 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 2064 { 2065 Mat B; 2066 Mat_SeqBAIJ *b; 2067 int i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs,size; 2068 2069 MPI_Comm_size(comm,&size); 2070 if (size > 1) SETERRQ(1,0,"Comm must be of size 1"); 2071 2072 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr); 2073 2074 if (mbs*bs!=m || nbs*bs!=n) 2075 SETERRQ(1,0,"Number rows, cols must be divisible by blocksize"); 2076 2077 *A = 0; 2078 PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATSEQBAIJ,comm); 2079 PLogObjectCreate(B); 2080 B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b); 2081 PetscMemzero(b,sizeof(Mat_SeqBAIJ)); 2082 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 2083 ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr); 2084 if (!flg) { 2085 switch (bs) { 2086 case 1: 2087 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2088 B->ops.solve = MatSolve_SeqBAIJ_1; 2089 B->ops.mult = MatMult_SeqBAIJ_1; 2090 B->ops.multadd = MatMultAdd_SeqBAIJ_1; 2091 break; 2092 case 2: 2093 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2094 B->ops.solve = MatSolve_SeqBAIJ_2; 2095 B->ops.mult = MatMult_SeqBAIJ_2; 2096 B->ops.multadd = MatMultAdd_SeqBAIJ_2; 2097 break; 2098 case 3: 2099 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2100 B->ops.solve = MatSolve_SeqBAIJ_3; 2101 B->ops.mult = MatMult_SeqBAIJ_3; 2102 B->ops.multadd = MatMultAdd_SeqBAIJ_3; 2103 break; 2104 case 4: 2105 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2106 B->ops.solve = MatSolve_SeqBAIJ_4; 2107 B->ops.mult = MatMult_SeqBAIJ_4; 2108 B->ops.multadd = MatMultAdd_SeqBAIJ_4; 2109 break; 2110 case 5: 2111 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2112 B->ops.solve = MatSolve_SeqBAIJ_5; 2113 B->ops.mult = MatMult_SeqBAIJ_5; 2114 B->ops.multadd = MatMultAdd_SeqBAIJ_5; 2115 break; 2116 case 7: 2117 B->ops.mult = MatMult_SeqBAIJ_7; 2118 B->ops.solve = MatSolve_SeqBAIJ_7; 2119 B->ops.multadd = MatMultAdd_SeqBAIJ_7; 2120 break; 2121 } 2122 } 2123 B->destroy = MatDestroy_SeqBAIJ; 2124 B->view = MatView_SeqBAIJ; 2125 B->factor = 0; 2126 B->lupivotthreshold = 1.0; 2127 B->mapping = 0; 2128 b->row = 0; 2129 b->col = 0; 2130 b->reallocs = 0; 2131 2132 b->m = m; B->m = m; B->M = m; 2133 b->n = n; B->n = n; B->N = n; 2134 b->mbs = mbs; 2135 b->nbs = nbs; 2136 b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax); 2137 if (nnz == PETSC_NULL) { 2138 if (nz == PETSC_DEFAULT) nz = 5; 2139 else if (nz <= 0) nz = 1; 2140 for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 2141 nz = nz*mbs; 2142 } 2143 else { 2144 nz = 0; 2145 for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 2146 } 2147 2148 /* allocate the matrix space */ 2149 len = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int); 2150 b->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a); 2151 PetscMemzero(b->a,nz*bs2*sizeof(Scalar)); 2152 b->j = (int *) (b->a + nz*bs2); 2153 PetscMemzero(b->j,nz*sizeof(int)); 2154 b->i = b->j + nz; 2155 b->singlemalloc = 1; 2156 2157 b->i[0] = 0; 2158 for (i=1; i<mbs+1; i++) { 2159 b->i[i] = b->i[i-1] + b->imax[i-1]; 2160 } 2161 2162 /* b->ilen will count nonzeros in each block row so far. */ 2163 b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 2164 PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 2165 for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 2166 2167 b->bs = bs; 2168 b->bs2 = bs2; 2169 b->mbs = mbs; 2170 b->nz = 0; 2171 b->maxnz = nz*bs2; 2172 b->sorted = 0; 2173 b->roworiented = 1; 2174 b->nonew = 0; 2175 b->diag = 0; 2176 b->solve_work = 0; 2177 b->mult_work = 0; 2178 b->spptr = 0; 2179 B->info.nz_unneeded = (double)b->maxnz; 2180 2181 *A = B; 2182 ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr); 2183 if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); } 2184 return 0; 2185 } 2186 2187 #undef __FUNC__ 2188 #define __FUNC__ "MatConvertSameType_SeqBAIJ" 2189 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues) 2190 { 2191 Mat C; 2192 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 2193 int i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 2194 2195 if (a->i[mbs] != nz) SETERRQ(1,0,"Corrupt matrix"); 2196 2197 *B = 0; 2198 PetscHeaderCreate(C,_p_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm); 2199 PLogObjectCreate(C); 2200 C->data = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c); 2201 PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps)); 2202 C->destroy = MatDestroy_SeqBAIJ; 2203 C->view = MatView_SeqBAIJ; 2204 C->factor = A->factor; 2205 c->row = 0; 2206 c->col = 0; 2207 C->assembled = PETSC_TRUE; 2208 2209 c->m = C->m = a->m; 2210 c->n = C->n = a->n; 2211 C->M = a->m; 2212 C->N = a->n; 2213 2214 c->bs = a->bs; 2215 c->bs2 = a->bs2; 2216 c->mbs = a->mbs; 2217 c->nbs = a->nbs; 2218 2219 c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax); 2220 c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen); 2221 for ( i=0; i<mbs; i++ ) { 2222 c->imax[i] = a->imax[i]; 2223 c->ilen[i] = a->ilen[i]; 2224 } 2225 2226 /* allocate the matrix space */ 2227 c->singlemalloc = 1; 2228 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int)); 2229 c->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a); 2230 c->j = (int *) (c->a + nz*bs2); 2231 c->i = c->j + nz; 2232 PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int)); 2233 if (mbs > 0) { 2234 PetscMemcpy(c->j,a->j,nz*sizeof(int)); 2235 if (cpvalues == COPY_VALUES) { 2236 PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar)); 2237 } 2238 } 2239 2240 PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 2241 c->sorted = a->sorted; 2242 c->roworiented = a->roworiented; 2243 c->nonew = a->nonew; 2244 2245 if (a->diag) { 2246 c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag); 2247 PLogObjectMemory(C,(mbs+1)*sizeof(int)); 2248 for ( i=0; i<mbs; i++ ) { 2249 c->diag[i] = a->diag[i]; 2250 } 2251 } 2252 else c->diag = 0; 2253 c->nz = a->nz; 2254 c->maxnz = a->maxnz; 2255 c->solve_work = 0; 2256 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 2257 c->mult_work = 0; 2258 *B = C; 2259 return 0; 2260 } 2261 2262 #undef __FUNC__ 2263 #define __FUNC__ "MatLoad_SeqBAIJ" 2264 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 2265 { 2266 Mat_SeqBAIJ *a; 2267 Mat B; 2268 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg; 2269 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2270 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 2271 int *masked, nmask,tmp,bs2,ishift; 2272 Scalar *aa; 2273 MPI_Comm comm = ((PetscObject) viewer)->comm; 2274 2275 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 2276 bs2 = bs*bs; 2277 2278 MPI_Comm_size(comm,&size); 2279 if (size > 1) SETERRQ(1,0,"view must have one processor"); 2280 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 2281 ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr); 2282 if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not Mat object"); 2283 M = header[1]; N = header[2]; nz = header[3]; 2284 2285 if (M != N) SETERRQ(1,0,"Can only do square matrices"); 2286 2287 /* 2288 This code adds extra rows to make sure the number of rows is 2289 divisible by the blocksize 2290 */ 2291 mbs = M/bs; 2292 extra_rows = bs - M + bs*(mbs); 2293 if (extra_rows == bs) extra_rows = 0; 2294 else mbs++; 2295 if (extra_rows) { 2296 PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 2297 } 2298 2299 /* read in row lengths */ 2300 rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 2301 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 2302 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 2303 2304 /* read in column indices */ 2305 jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj); 2306 ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr); 2307 for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 2308 2309 /* loop over row lengths determining block row lengths */ 2310 browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 2311 PetscMemzero(browlengths,mbs*sizeof(int)); 2312 mask = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask); 2313 PetscMemzero(mask,mbs*sizeof(int)); 2314 masked = mask + mbs; 2315 rowcount = 0; nzcount = 0; 2316 for ( i=0; i<mbs; i++ ) { 2317 nmask = 0; 2318 for ( j=0; j<bs; j++ ) { 2319 kmax = rowlengths[rowcount]; 2320 for ( k=0; k<kmax; k++ ) { 2321 tmp = jj[nzcount++]/bs; 2322 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2323 } 2324 rowcount++; 2325 } 2326 browlengths[i] += nmask; 2327 /* zero out the mask elements we set */ 2328 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 2329 } 2330 2331 /* create our matrix */ 2332 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A); 2333 CHKERRQ(ierr); 2334 B = *A; 2335 a = (Mat_SeqBAIJ *) B->data; 2336 2337 /* set matrix "i" values */ 2338 a->i[0] = 0; 2339 for ( i=1; i<= mbs; i++ ) { 2340 a->i[i] = a->i[i-1] + browlengths[i-1]; 2341 a->ilen[i-1] = browlengths[i-1]; 2342 } 2343 a->nz = 0; 2344 for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 2345 2346 /* read in nonzero values */ 2347 aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 2348 ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr); 2349 for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 2350 2351 /* set "a" and "j" values into matrix */ 2352 nzcount = 0; jcount = 0; 2353 for ( i=0; i<mbs; i++ ) { 2354 nzcountb = nzcount; 2355 nmask = 0; 2356 for ( j=0; j<bs; j++ ) { 2357 kmax = rowlengths[i*bs+j]; 2358 for ( k=0; k<kmax; k++ ) { 2359 tmp = jj[nzcount++]/bs; 2360 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2361 } 2362 rowcount++; 2363 } 2364 /* sort the masked values */ 2365 PetscSortInt(nmask,masked); 2366 2367 /* set "j" values into matrix */ 2368 maskcount = 1; 2369 for ( j=0; j<nmask; j++ ) { 2370 a->j[jcount++] = masked[j]; 2371 mask[masked[j]] = maskcount++; 2372 } 2373 /* set "a" values into matrix */ 2374 ishift = bs2*a->i[i]; 2375 for ( j=0; j<bs; j++ ) { 2376 kmax = rowlengths[i*bs+j]; 2377 for ( k=0; k<kmax; k++ ) { 2378 tmp = jj[nzcountb]/bs ; 2379 block = mask[tmp] - 1; 2380 point = jj[nzcountb] - bs*tmp; 2381 idx = ishift + bs2*block + j + bs*point; 2382 a->a[idx] = aa[nzcountb++]; 2383 } 2384 } 2385 /* zero out the mask elements we set */ 2386 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 2387 } 2388 if (jcount != a->nz) SETERRQ(1,0,"Error bad binary matrix"); 2389 2390 PetscFree(rowlengths); 2391 PetscFree(browlengths); 2392 PetscFree(aa); 2393 PetscFree(jj); 2394 PetscFree(mask); 2395 2396 B->assembled = PETSC_TRUE; 2397 2398 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr); 2399 if (flg) { 2400 Viewer tviewer; 2401 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2402 ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO,0);CHKERRQ(ierr); 2403 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2404 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2405 } 2406 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr); 2407 if (flg) { 2408 Viewer tviewer; 2409 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2410 ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO_LONG,0);CHKERRQ(ierr); 2411 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2412 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2413 } 2414 ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr); 2415 if (flg) { 2416 Viewer tviewer; 2417 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2418 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2419 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2420 } 2421 ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr); 2422 if (flg) { 2423 Viewer tviewer; 2424 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2425 ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_MATLAB,"M");CHKERRQ(ierr); 2426 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2427 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2428 } 2429 ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr); 2430 if (flg) { 2431 Viewer tviewer; 2432 ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr); 2433 ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr); 2434 ierr = ViewerFlush(tviewer); CHKERRQ(ierr); 2435 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2436 } 2437 return 0; 2438 } 2439 2440 2441 2442