1 #ifndef lint 2 static char vcid[] = "$Id: baij.c,v 1.103 1997/06/03 20:26:37 balay Exp bsmith $"; 3 #endif 4 5 /* 6 Defines the basic matrix operations for the BAIJ (compressed row) 7 matrix storage format. 8 */ 9 10 #include "pinclude/pviewer.h" 11 #include "sys.h" 12 #include "src/mat/impls/baij/seq/baij.h" 13 #include "src/vec/vecimpl.h" 14 #include "src/inline/spops.h" 15 #include "petsc.h" 16 17 18 /* 19 Adds diagonal pointers to sparse matrix structure. 20 */ 21 22 #undef __FUNC__ 23 #define __FUNC__ "MatMarkDiag_SeqBAIJ" 24 int MatMarkDiag_SeqBAIJ(Mat A) 25 { 26 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 27 int i,j, *diag, m = a->mbs; 28 29 diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag); 30 PLogObjectMemory(A,(m+1)*sizeof(int)); 31 for ( i=0; i<m; i++ ) { 32 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 33 if (a->j[j] == i) { 34 diag[i] = j; 35 break; 36 } 37 } 38 } 39 a->diag = diag; 40 return 0; 41 } 42 43 44 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 45 46 #undef __FUNC__ 47 #define __FUNC__ "MatGetRowIJ_SeqBAIJ" 48 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 49 PetscTruth *done) 50 { 51 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 52 int ierr, n = a->mbs,i; 53 54 *nn = n; 55 if (!ia) return 0; 56 if (symmetric) { 57 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 58 } else if (oshift == 1) { 59 /* temporarily add 1 to i and j indices */ 60 int nz = a->i[n] + 1; 61 for ( i=0; i<nz; i++ ) a->j[i]++; 62 for ( i=0; i<n+1; i++ ) a->i[i]++; 63 *ia = a->i; *ja = a->j; 64 } else { 65 *ia = a->i; *ja = a->j; 66 } 67 68 return 0; 69 } 70 71 #undef __FUNC__ 72 #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ" /* ADIC Ignore */ 73 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 74 PetscTruth *done) 75 { 76 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 77 int i,n = a->mbs; 78 79 if (!ia) return 0; 80 if (symmetric) { 81 PetscFree(*ia); 82 PetscFree(*ja); 83 } else if (oshift == 1) { 84 int nz = a->i[n]; 85 for ( i=0; i<nz; i++ ) a->j[i]--; 86 for ( i=0; i<n+1; i++ ) a->i[i]--; 87 } 88 return 0; 89 } 90 91 92 #undef __FUNC__ 93 #define __FUNC__ "MatView_SeqBAIJ_Binary" /* ADIC Ignore */ 94 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 95 { 96 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 97 int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 98 Scalar *aa; 99 100 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 101 col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 102 col_lens[0] = MAT_COOKIE; 103 104 col_lens[1] = a->m; 105 col_lens[2] = a->n; 106 col_lens[3] = a->nz*bs2; 107 108 /* store lengths of each row and write (including header) to file */ 109 count = 0; 110 for ( i=0; i<a->mbs; i++ ) { 111 for ( j=0; j<bs; j++ ) { 112 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 113 } 114 } 115 ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr); 116 PetscFree(col_lens); 117 118 /* store column indices (zero start index) */ 119 jj = (int *) PetscMalloc( (a->nz+1)*bs2*sizeof(int) ); CHKPTRQ(jj); 120 count = 0; 121 for ( i=0; i<a->mbs; i++ ) { 122 for ( j=0; j<bs; j++ ) { 123 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 124 for ( l=0; l<bs; l++ ) { 125 jj[count++] = bs*a->j[k] + l; 126 } 127 } 128 } 129 } 130 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr); 131 PetscFree(jj); 132 133 /* store nonzero values */ 134 aa = (Scalar *) PetscMalloc((a->nz+1)*bs2*sizeof(Scalar)); CHKPTRQ(aa); 135 count = 0; 136 for ( i=0; i<a->mbs; i++ ) { 137 for ( j=0; j<bs; j++ ) { 138 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 139 for ( l=0; l<bs; l++ ) { 140 aa[count++] = a->a[bs2*k + l*bs + j]; 141 } 142 } 143 } 144 } 145 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr); 146 PetscFree(aa); 147 return 0; 148 } 149 150 #undef __FUNC__ 151 #define __FUNC__ "MatView_SeqBAIJ_ASCII" /* ADIC Ignore */ 152 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 153 { 154 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 155 int ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2; 156 FILE *fd; 157 char *outputname; 158 159 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 160 ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr); 161 ierr = ViewerGetFormat(viewer,&format); 162 if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) { 163 fprintf(fd," block size is %d\n",bs); 164 } 165 else if (format == VIEWER_FORMAT_ASCII_MATLAB) { 166 SETERRQ(1,0,"Matlab format not supported"); 167 } 168 else if (format == VIEWER_FORMAT_ASCII_COMMON) { 169 for ( i=0; i<a->mbs; i++ ) { 170 for ( j=0; j<bs; j++ ) { 171 fprintf(fd,"row %d:",i*bs+j); 172 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 173 for ( l=0; l<bs; l++ ) { 174 #if defined(PETSC_COMPLEX) 175 if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0) 176 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 177 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 178 else if (real(a->a[bs2*k + l*bs + j]) != 0.0) 179 fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 180 #else 181 if (a->a[bs2*k + l*bs + j] != 0.0) 182 fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 183 #endif 184 } 185 } 186 fprintf(fd,"\n"); 187 } 188 } 189 } 190 else { 191 for ( i=0; i<a->mbs; i++ ) { 192 for ( j=0; j<bs; j++ ) { 193 fprintf(fd,"row %d:",i*bs+j); 194 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 195 for ( l=0; l<bs; l++ ) { 196 #if defined(PETSC_COMPLEX) 197 if (imag(a->a[bs2*k + l*bs + j]) != 0.0) { 198 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 199 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 200 } 201 else { 202 fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 203 } 204 #else 205 fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 206 #endif 207 } 208 } 209 fprintf(fd,"\n"); 210 } 211 } 212 } 213 fflush(fd); 214 return 0; 215 } 216 217 #undef __FUNC__ 218 #define __FUNC__ "MatView_SeqBAIJ_Draw" /* ADIC Ignore */ 219 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer) 220 { 221 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *) A->data; 222 int row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2; 223 double xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r; 224 Scalar *aa; 225 Draw draw; 226 DrawButton button; 227 PetscTruth isnull; 228 229 ViewerDrawGetDraw(viewer,&draw); 230 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 231 232 xr = a->n; yr = a->m; h = yr/10.0; w = xr/10.0; 233 xr += w; yr += h; xl = -w; yl = -h; 234 ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr); 235 /* loop over matrix elements drawing boxes */ 236 color = DRAW_BLUE; 237 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 238 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 239 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 240 x_l = a->j[j]*bs; x_r = x_l + 1.0; 241 aa = a->a + j*bs2; 242 for ( k=0; k<bs; k++ ) { 243 for ( l=0; l<bs; l++ ) { 244 if (PetscReal(*aa++) >= 0.) continue; 245 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 246 } 247 } 248 } 249 } 250 color = DRAW_CYAN; 251 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 252 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 253 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 254 x_l = a->j[j]*bs; x_r = x_l + 1.0; 255 aa = a->a + j*bs2; 256 for ( k=0; k<bs; k++ ) { 257 for ( l=0; l<bs; l++ ) { 258 if (PetscReal(*aa++) != 0.) continue; 259 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 260 } 261 } 262 } 263 } 264 265 color = DRAW_RED; 266 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 267 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 268 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 269 x_l = a->j[j]*bs; x_r = x_l + 1.0; 270 aa = a->a + j*bs2; 271 for ( k=0; k<bs; k++ ) { 272 for ( l=0; l<bs; l++ ) { 273 if (PetscReal(*aa++) <= 0.) continue; 274 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 275 } 276 } 277 } 278 } 279 280 DrawFlush(draw); 281 DrawGetPause(draw,&pause); 282 if (pause >= 0) { PetscSleep(pause); return 0;} 283 284 /* allow the matrix to zoom or shrink */ 285 ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0); 286 while (button != BUTTON_RIGHT) { 287 DrawClear(draw); 288 if (button == BUTTON_LEFT) scale = .5; 289 else if (button == BUTTON_CENTER) scale = 2.; 290 xl = scale*(xl + w - xc) + xc - w*scale; 291 xr = scale*(xr - w - xc) + xc + w*scale; 292 yl = scale*(yl + h - yc) + yc - h*scale; 293 yr = scale*(yr - h - yc) + yc + h*scale; 294 w *= scale; h *= scale; 295 ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr); 296 color = DRAW_BLUE; 297 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 298 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 299 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 300 x_l = a->j[j]*bs; x_r = x_l + 1.0; 301 aa = a->a + j*bs2; 302 for ( k=0; k<bs; k++ ) { 303 for ( l=0; l<bs; l++ ) { 304 if (PetscReal(*aa++) >= 0.) continue; 305 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 306 } 307 } 308 } 309 } 310 color = DRAW_CYAN; 311 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 312 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 313 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 314 x_l = a->j[j]*bs; x_r = x_l + 1.0; 315 aa = a->a + j*bs2; 316 for ( k=0; k<bs; k++ ) { 317 for ( l=0; l<bs; l++ ) { 318 if (PetscReal(*aa++) != 0.) continue; 319 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 320 } 321 } 322 } 323 } 324 325 color = DRAW_RED; 326 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 327 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 328 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 329 x_l = a->j[j]*bs; x_r = x_l + 1.0; 330 aa = a->a + j*bs2; 331 for ( k=0; k<bs; k++ ) { 332 for ( l=0; l<bs; l++ ) { 333 if (PetscReal(*aa++) <= 0.) continue; 334 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 335 } 336 } 337 } 338 } 339 ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0); 340 } 341 return 0; 342 } 343 344 #undef __FUNC__ 345 #define __FUNC__ "MatView_SeqBAIJ" /* ADIC Ignore */ 346 int MatView_SeqBAIJ(PetscObject obj,Viewer viewer) 347 { 348 Mat A = (Mat) obj; 349 ViewerType vtype; 350 int ierr; 351 352 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 353 if (vtype == MATLAB_VIEWER) { 354 SETERRQ(1,0,"Matlab viewer not supported"); 355 } 356 else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){ 357 return MatView_SeqBAIJ_ASCII(A,viewer); 358 } 359 else if (vtype == BINARY_FILE_VIEWER) { 360 return MatView_SeqBAIJ_Binary(A,viewer); 361 } 362 else if (vtype == DRAW_VIEWER) { 363 return MatView_SeqBAIJ_Draw(A,viewer); 364 } 365 return 0; 366 } 367 368 #define CHUNKSIZE 10 369 370 #undef __FUNC__ 371 #define __FUNC__ "MatSetValues_SeqBAIJ" 372 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 373 { 374 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 375 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 376 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 377 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 378 int ridx,cidx,bs2=a->bs2; 379 Scalar *ap,value,*aa=a->a,*bap; 380 381 for ( k=0; k<m; k++ ) { /* loop over added rows */ 382 row = im[k]; brow = row/bs; 383 #if defined(PETSC_BOPT_g) 384 if (row < 0) SETERRQ(1,0,"Negative row"); 385 if (row >= a->m) SETERRQ(1,0,"Row too large"); 386 #endif 387 rp = aj + ai[brow]; 388 ap = aa + bs2*ai[brow]; 389 rmax = imax[brow]; 390 nrow = ailen[brow]; 391 low = 0; 392 for ( l=0; l<n; l++ ) { /* loop over added columns */ 393 #if defined(PETSC_BOPT_g) 394 if (in[l] < 0) SETERRQ(1,0,"Negative column"); 395 if (in[l] >= a->n) SETERRQ(1,0,"Column too large"); 396 #endif 397 col = in[l]; bcol = col/bs; 398 ridx = row % bs; cidx = col % bs; 399 if (roworiented) { 400 value = *v++; 401 } 402 else { 403 value = v[k + l*m]; 404 } 405 if (!sorted) low = 0; high = nrow; 406 while (high-low > 7) { 407 t = (low+high)/2; 408 if (rp[t] > bcol) high = t; 409 else low = t; 410 } 411 for ( i=low; i<high; i++ ) { 412 if (rp[i] > bcol) break; 413 if (rp[i] == bcol) { 414 bap = ap + bs2*i + bs*cidx + ridx; 415 if (is == ADD_VALUES) *bap += value; 416 else *bap = value; 417 goto noinsert1; 418 } 419 } 420 if (nonew == 1) goto noinsert1; 421 else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); 422 if (nrow >= rmax) { 423 /* there is no extra room in row, therefore enlarge */ 424 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 425 Scalar *new_a; 426 427 if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); 428 429 /* Malloc new storage space */ 430 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 431 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 432 new_j = (int *) (new_a + bs2*new_nz); 433 new_i = new_j + new_nz; 434 435 /* copy over old data into new slots */ 436 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} 437 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 438 PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); 439 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 440 PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, 441 len*sizeof(int)); 442 PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); 443 PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 444 PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), 445 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); 446 /* free up old matrix storage */ 447 PetscFree(a->a); 448 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 449 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 450 a->singlemalloc = 1; 451 452 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 453 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 454 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 455 a->maxnz += bs2*CHUNKSIZE; 456 a->reallocs++; 457 a->nz++; 458 } 459 N = nrow++ - 1; 460 /* shift up all the later entries in this row */ 461 for ( ii=N; ii>=i; ii-- ) { 462 rp[ii+1] = rp[ii]; 463 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 464 } 465 if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 466 rp[i] = bcol; 467 ap[bs2*i + bs*cidx + ridx] = value; 468 noinsert1:; 469 low = i; 470 } 471 ailen[brow] = nrow; 472 } 473 return 0; 474 } 475 476 #undef __FUNC__ 477 #define __FUNC__ "MatSetValuesBlocked_SeqBAIJ" 478 int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 479 { 480 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 481 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 482 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 483 int roworiented=a->roworiented; 484 int *aj=a->j,nonew=a->nonew; 485 int bs2=a->bs2,bs=a->bs,stepval; 486 Scalar *ap,*value=v,*aa=a->a,*bap; 487 488 if (roworiented) { 489 stepval = (n-1)*bs; 490 } else { 491 stepval = (m-1)*bs; 492 } 493 for ( k=0; k<m; k++ ) { /* loop over added rows */ 494 row = im[k]; 495 #if defined(PETSC_BOPT_g) 496 if (row < 0) SETERRQ(1,0,"Negative row"); 497 if (row >= a->mbs) SETERRQ(1,0,"Row too large"); 498 #endif 499 rp = aj + ai[row]; 500 ap = aa + bs2*ai[row]; 501 rmax = imax[row]; 502 nrow = ailen[row]; 503 low = 0; 504 for ( l=0; l<n; l++ ) { /* loop over added columns */ 505 #if defined(PETSC_BOPT_g) 506 if (in[l] < 0) SETERRQ(1,0,"Negative column"); 507 if (in[l] >= a->nbs) SETERRQ(1,0,"Column too large"); 508 #endif 509 col = in[l]; 510 if (roworiented) { 511 value = v + k*(stepval+bs)*bs + l*bs; 512 } else { 513 value = v + l*(stepval+bs)*bs + k*bs; 514 } 515 if (!sorted) low = 0; high = nrow; 516 while (high-low > 7) { 517 t = (low+high)/2; 518 if (rp[t] > col) high = t; 519 else low = t; 520 } 521 for ( i=low; i<high; i++ ) { 522 if (rp[i] > col) break; 523 if (rp[i] == col) { 524 bap = ap + bs2*i; 525 if (roworiented) { 526 if (is == ADD_VALUES) { 527 for ( ii=0; ii<bs; ii++,value+=stepval ) 528 for (jj=ii; jj<bs2; jj+=bs ) 529 bap[jj] += *value++; 530 } else { 531 for ( ii=0; ii<bs; ii++,value+=stepval ) 532 for (jj=ii; jj<bs2; jj+=bs ) 533 bap[jj] = *value++; 534 } 535 } else { 536 if (is == ADD_VALUES) { 537 for ( ii=0; ii<bs; ii++,value+=stepval ) 538 for (jj=0; jj<bs; jj++ ) 539 *bap++ += *value++; 540 } else { 541 for ( ii=0; ii<bs; ii++,value+=stepval ) 542 for (jj=0; jj<bs; jj++ ) 543 *bap++ = *value++; 544 } 545 } 546 goto noinsert2; 547 } 548 } 549 if (nonew == 1) goto noinsert2; 550 else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); 551 if (nrow >= rmax) { 552 /* there is no extra room in row, therefore enlarge */ 553 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 554 Scalar *new_a; 555 556 if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); 557 558 /* malloc new storage space */ 559 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 560 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 561 new_j = (int *) (new_a + bs2*new_nz); 562 new_i = new_j + new_nz; 563 564 /* copy over old data into new slots */ 565 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} 566 for ( ii=row+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 567 PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int)); 568 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 569 PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow, 570 len*sizeof(int)); 571 PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(Scalar)); 572 PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 573 PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE), 574 aa+bs2*(ai[row]+nrow),bs2*len*sizeof(Scalar)); 575 /* free up old matrix storage */ 576 PetscFree(a->a); 577 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 578 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 579 a->singlemalloc = 1; 580 581 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 582 rmax = imax[row] = imax[row] + CHUNKSIZE; 583 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 584 a->maxnz += bs2*CHUNKSIZE; 585 a->reallocs++; 586 a->nz++; 587 } 588 N = nrow++ - 1; 589 /* shift up all the later entries in this row */ 590 for ( ii=N; ii>=i; ii-- ) { 591 rp[ii+1] = rp[ii]; 592 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 593 } 594 if (N >= i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 595 rp[i] = col; 596 bap = ap + bs2*i; 597 if (roworiented) { 598 for ( ii=0; ii<bs; ii++,value+=stepval) 599 for (jj=ii; jj<bs2; jj+=bs ) 600 bap[jj] = *value++; 601 } else { 602 for ( ii=0; ii<bs; ii++,value+=stepval ) 603 for (jj=0; jj<bs; jj++ ) 604 *bap++ = *value++; 605 } 606 noinsert2:; 607 low = i; 608 } 609 ailen[row] = nrow; 610 } 611 return 0; 612 } 613 614 #undef __FUNC__ 615 #define __FUNC__ "MatGetSize_SeqBAIJ" /* ADIC Ignore */ 616 int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 617 { 618 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 619 *m = a->m; *n = a->n; 620 return 0; 621 } 622 623 #undef __FUNC__ 624 #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ" /* ADIC Ignore */ 625 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 626 { 627 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 628 *m = 0; *n = a->m; 629 return 0; 630 } 631 632 #undef __FUNC__ 633 #define __FUNC__ "MatGetRow_SeqBAIJ" 634 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 635 { 636 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 637 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 638 Scalar *aa,*v_i,*aa_i; 639 640 bs = a->bs; 641 ai = a->i; 642 aj = a->j; 643 aa = a->a; 644 bs2 = a->bs2; 645 646 if (row < 0 || row >= a->m) SETERRQ(1,0,"Row out of range"); 647 648 bn = row/bs; /* Block number */ 649 bp = row % bs; /* Block Position */ 650 M = ai[bn+1] - ai[bn]; 651 *nz = bs*M; 652 653 if (v) { 654 *v = 0; 655 if (*nz) { 656 *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v); 657 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 658 v_i = *v + i*bs; 659 aa_i = aa + bs2*(ai[bn] + i); 660 for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];} 661 } 662 } 663 } 664 665 if (idx) { 666 *idx = 0; 667 if (*nz) { 668 *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx); 669 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 670 idx_i = *idx + i*bs; 671 itmp = bs*aj[ai[bn] + i]; 672 for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;} 673 } 674 } 675 } 676 return 0; 677 } 678 679 #undef __FUNC__ 680 #define __FUNC__ "MatRestoreRow_SeqBAIJ" /* ADIC Ignore */ 681 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 682 { 683 if (idx) {if (*idx) PetscFree(*idx);} 684 if (v) {if (*v) PetscFree(*v);} 685 return 0; 686 } 687 688 #undef __FUNC__ 689 #define __FUNC__ "MatTranspose_SeqBAIJ" 690 int MatTranspose_SeqBAIJ(Mat A,Mat *B) 691 { 692 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 693 Mat C; 694 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 695 int *rows,*cols,bs2=a->bs2; 696 Scalar *array=a->a; 697 698 if (B==PETSC_NULL && mbs!=nbs) 699 SETERRQ(1,0,"Square matrix only for in-place"); 700 col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col); 701 PetscMemzero(col,(1+nbs)*sizeof(int)); 702 703 for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1; 704 ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr); 705 PetscFree(col); 706 rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows); 707 cols = rows + bs; 708 for ( i=0; i<mbs; i++ ) { 709 cols[0] = i*bs; 710 for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1; 711 len = ai[i+1] - ai[i]; 712 for ( j=0; j<len; j++ ) { 713 rows[0] = (*aj++)*bs; 714 for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1; 715 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr); 716 array += bs2; 717 } 718 } 719 PetscFree(rows); 720 721 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 722 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 723 724 if (B != PETSC_NULL) { 725 *B = C; 726 } else { 727 /* This isn't really an in-place transpose */ 728 PetscFree(a->a); 729 if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);} 730 if (a->diag) PetscFree(a->diag); 731 if (a->ilen) PetscFree(a->ilen); 732 if (a->imax) PetscFree(a->imax); 733 if (a->solve_work) PetscFree(a->solve_work); 734 PetscFree(a); 735 PetscMemcpy(A,C,sizeof(struct _p_Mat)); 736 PetscHeaderDestroy(C); 737 } 738 return 0; 739 } 740 741 742 #undef __FUNC__ 743 #define __FUNC__ "MatAssemblyEnd_SeqBAIJ" 744 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 745 { 746 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 747 int fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax; 748 int m = a->m,*ip, N, *ailen = a->ilen; 749 int mbs = a->mbs, bs2 = a->bs2,rmax; 750 Scalar *aa = a->a, *ap; 751 752 if (mode == MAT_FLUSH_ASSEMBLY) return 0; 753 754 rmax = ailen[0]; 755 for ( i=1; i<mbs; i++ ) { 756 /* move each row back by the amount of empty slots (fshift) before it*/ 757 fshift += imax[i-1] - ailen[i-1]; 758 rmax = PetscMax(rmax,ailen[i]); 759 if (fshift) { 760 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 761 N = ailen[i]; 762 for ( j=0; j<N; j++ ) { 763 ip[j-fshift] = ip[j]; 764 PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar)); 765 } 766 } 767 ai[i] = ai[i-1] + ailen[i-1]; 768 } 769 if (mbs) { 770 fshift += imax[mbs-1] - ailen[mbs-1]; 771 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 772 } 773 /* reset ilen and imax for each row */ 774 for ( i=0; i<mbs; i++ ) { 775 ailen[i] = imax[i] = ai[i+1] - ai[i]; 776 } 777 a->nz = ai[mbs]; 778 779 /* diagonals may have moved, so kill the diagonal pointers */ 780 if (fshift && a->diag) { 781 PetscFree(a->diag); 782 PLogObjectMemory(A,-(m+1)*sizeof(int)); 783 a->diag = 0; 784 } 785 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 786 m,a->n,a->bs,fshift*bs2,a->nz*bs2); 787 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n", 788 a->reallocs); 789 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 790 a->reallocs = 0; 791 A->info.nz_unneeded = (double)fshift*bs2; 792 793 return 0; 794 } 795 796 #undef __FUNC__ 797 #define __FUNC__ "MatZeroEntries_SeqBAIJ" 798 int MatZeroEntries_SeqBAIJ(Mat A) 799 { 800 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 801 PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar)); 802 return 0; 803 } 804 805 #undef __FUNC__ 806 #define __FUNC__ "MatDestroy_SeqBAIJ" /* ADIC Ignore */ 807 int MatDestroy_SeqBAIJ(PetscObject obj) 808 { 809 Mat A = (Mat) obj; 810 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 811 812 #if defined(PETSC_LOG) 813 PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 814 #endif 815 PetscFree(a->a); 816 if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);} 817 if (a->diag) PetscFree(a->diag); 818 if (a->ilen) PetscFree(a->ilen); 819 if (a->imax) PetscFree(a->imax); 820 if (a->solve_work) PetscFree(a->solve_work); 821 if (a->mult_work) PetscFree(a->mult_work); 822 PetscFree(a); 823 PLogObjectDestroy(A); 824 PetscHeaderDestroy(A); 825 return 0; 826 } 827 828 #undef __FUNC__ 829 #define __FUNC__ "MatSetOption_SeqBAIJ" /* ADIC Ignore */ 830 int MatSetOption_SeqBAIJ(Mat A,MatOption op) 831 { 832 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 833 if (op == MAT_ROW_ORIENTED) a->roworiented = 1; 834 else if (op == MAT_COLUMN_ORIENTED) a->roworiented = 0; 835 else if (op == MAT_COLUMNS_SORTED) a->sorted = 1; 836 else if (op == MAT_COLUMNS_UNSORTED) a->sorted = 0; 837 else if (op == MAT_NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 838 else if (op == MAT_NEW_NONZERO_LOCATION_ERROR) a->nonew = -1; 839 else if (op == MAT_NEW_NONZERO_ALLOCATION_ERROR) a->nonew = -2; 840 else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 841 else if (op == MAT_ROWS_SORTED || 842 op == MAT_ROWS_UNSORTED || 843 op == MAT_SYMMETRIC || 844 op == MAT_STRUCTURALLY_SYMMETRIC || 845 op == MAT_YES_NEW_DIAGONALS || 846 op == MAT_IGNORE_OFF_PROC_ENTRIES) 847 PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n"); 848 else if (op == MAT_NO_NEW_DIAGONALS) 849 {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");} 850 else 851 {SETERRQ(PETSC_ERR_SUP,0,"unknown option");} 852 return 0; 853 } 854 855 856 /* -------------------------------------------------------*/ 857 /* Should check that shapes of vectors and matrices match */ 858 /* -------------------------------------------------------*/ 859 #include "pinclude/plapack.h" 860 861 #undef __FUNC__ 862 #define __FUNC__ "MatMult_SeqBAIJ_1" 863 static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz) 864 { 865 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 866 register Scalar *x,*z,*v,sum; 867 int mbs=a->mbs,i,*idx,*ii,n; 868 869 VecGetArray_Fast(xx,x); 870 VecGetArray_Fast(zz,z); 871 872 idx = a->j; 873 v = a->a; 874 ii = a->i; 875 876 for ( i=0; i<mbs; i++ ) { 877 n = ii[1] - ii[0]; ii++; 878 sum = 0.0; 879 while (n--) sum += *v++ * x[*idx++]; 880 z[i] = sum; 881 } 882 VecRestoreArray_Fast(xx,x); 883 VecRestoreArray_Fast(zz,z); 884 PLogFlops(2*a->nz - a->m); 885 return 0; 886 } 887 888 #undef __FUNC__ 889 #define __FUNC__ "MatMult_SeqBAIJ_2" 890 static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz) 891 { 892 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 893 register Scalar *x,*z,*v,*xb,sum1,sum2; 894 register Scalar x1,x2; 895 int mbs=a->mbs,i,*idx,*ii; 896 int j,n; 897 898 VecGetArray_Fast(xx,x); 899 VecGetArray_Fast(zz,z); 900 901 idx = a->j; 902 v = a->a; 903 ii = a->i; 904 905 for ( i=0; i<mbs; i++ ) { 906 n = ii[1] - ii[0]; ii++; 907 sum1 = 0.0; sum2 = 0.0; 908 for ( j=0; j<n; j++ ) { 909 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 910 sum1 += v[0]*x1 + v[2]*x2; 911 sum2 += v[1]*x1 + v[3]*x2; 912 v += 4; 913 } 914 z[0] = sum1; z[1] = sum2; 915 z += 2; 916 } 917 VecRestoreArray_Fast(xx,x); 918 VecRestoreArray_Fast(zz,z); 919 PLogFlops(4*a->nz - a->m); 920 return 0; 921 } 922 923 #undef __FUNC__ 924 #define __FUNC__ "MatMult_SeqBAIJ_3" 925 static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz) 926 { 927 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 928 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 929 int mbs=a->mbs,i,*idx,*ii,j,n; 930 931 VecGetArray_Fast(xx,x); 932 VecGetArray_Fast(zz,z); 933 934 idx = a->j; 935 v = a->a; 936 ii = a->i; 937 938 for ( i=0; i<mbs; i++ ) { 939 n = ii[1] - ii[0]; ii++; 940 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 941 for ( j=0; j<n; j++ ) { 942 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 943 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 944 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 945 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 946 v += 9; 947 } 948 z[0] = sum1; z[1] = sum2; z[2] = sum3; 949 z += 3; 950 } 951 VecRestoreArray_Fast(xx,x); 952 VecRestoreArray_Fast(zz,z); 953 PLogFlops(18*a->nz - a->m); 954 return 0; 955 } 956 957 #undef __FUNC__ 958 #define __FUNC__ "MatMult_SeqBAIJ_4" 959 static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz) 960 { 961 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 962 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4; 963 register Scalar x1,x2,x3,x4; 964 int mbs=a->mbs,i,*idx,*ii; 965 int j,n; 966 967 VecGetArray_Fast(xx,x); 968 VecGetArray_Fast(zz,z); 969 970 idx = a->j; 971 v = a->a; 972 ii = a->i; 973 974 for ( i=0; i<mbs; i++ ) { 975 n = ii[1] - ii[0]; ii++; 976 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 977 for ( j=0; j<n; j++ ) { 978 xb = x + 4*(*idx++); 979 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 980 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 981 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 982 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 983 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 984 v += 16; 985 } 986 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 987 z += 4; 988 } 989 VecRestoreArray_Fast(xx,x); 990 VecRestoreArray_Fast(zz,z); 991 PLogFlops(32*a->nz - a->m); 992 return 0; 993 } 994 995 #undef __FUNC__ 996 #define __FUNC__ "MatMult_SeqBAIJ_5" 997 static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz) 998 { 999 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1000 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5; 1001 register Scalar x1,x2,x3,x4,x5; 1002 int mbs=a->mbs,i,*idx,*ii,j,n; 1003 1004 VecGetArray_Fast(xx,x); 1005 VecGetArray_Fast(zz,z); 1006 1007 idx = a->j; 1008 v = a->a; 1009 ii = a->i; 1010 1011 for ( i=0; i<mbs; i++ ) { 1012 n = ii[1] - ii[0]; ii++; 1013 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 1014 for ( j=0; j<n; j++ ) { 1015 xb = x + 5*(*idx++); 1016 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 1017 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 1018 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 1019 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 1020 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 1021 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 1022 v += 25; 1023 } 1024 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 1025 z += 5; 1026 } 1027 VecRestoreArray_Fast(xx,x); 1028 VecRestoreArray_Fast(zz,z); 1029 PLogFlops(50*a->nz - a->m); 1030 return 0; 1031 } 1032 1033 #undef __FUNC__ 1034 #define __FUNC__ "MatMult_SeqBAIJ_7" 1035 static int MatMult_SeqBAIJ_7(Mat A,Vec xx,Vec zz) 1036 { 1037 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1038 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7; 1039 register Scalar x1,x2,x3,x4,x5,x6,x7; 1040 int mbs=a->mbs,i,*idx,*ii,j,n; 1041 1042 VecGetArray_Fast(xx,x); 1043 VecGetArray_Fast(zz,z); 1044 1045 idx = a->j; 1046 v = a->a; 1047 ii = a->i; 1048 1049 for ( i=0; i<mbs; i++ ) { 1050 n = ii[1] - ii[0]; ii++; 1051 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0; 1052 for ( j=0; j<n; j++ ) { 1053 xb = x + 7*(*idx++); 1054 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6]; 1055 sum1 += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 1056 sum2 += v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 1057 sum3 += v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 1058 sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 1059 sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 1060 sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 1061 sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 1062 v += 49; 1063 } 1064 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7; 1065 z += 7; 1066 } 1067 1068 VecRestoreArray_Fast(xx,x); 1069 VecRestoreArray_Fast(zz,z); 1070 PLogFlops(98*a->nz - a->m); 1071 return 0; 1072 } 1073 1074 #undef __FUNC__ 1075 #define __FUNC__ "MatMult_SeqBAIJ_N" 1076 static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz) 1077 { 1078 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1079 register Scalar *x,*z,*v,*xb; 1080 int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2; 1081 int _One = 1,ncols,k; 1082 Scalar _DOne = 1.0,*work,*workt,_DZero = 0.0; 1083 1084 VecGetArray_Fast(xx,x); 1085 VecGetArray_Fast(zz,z); 1086 1087 idx = a->j; 1088 v = a->a; 1089 ii = a->i; 1090 1091 1092 if (!a->mult_work) { 1093 k = PetscMax(a->m,a->n); 1094 a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work); 1095 } 1096 work = a->mult_work; 1097 for ( i=0; i<mbs; i++ ) { 1098 n = ii[1] - ii[0]; ii++; 1099 ncols = n*bs; 1100 workt = work; 1101 for ( j=0; j<n; j++ ) { 1102 xb = x + bs*(*idx++); 1103 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 1104 workt += bs; 1105 } 1106 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One); 1107 v += n*bs2; 1108 z += bs; 1109 } 1110 VecRestoreArray_Fast(xx,x); 1111 VecRestoreArray_Fast(zz,z); 1112 PLogFlops(2*a->nz*bs2 - a->m); 1113 return 0; 1114 } 1115 1116 #undef __FUNC__ 1117 #define __FUNC__ "MatMultAdd_SeqBAIJ_1" 1118 static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 1119 { 1120 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1121 register Scalar *x,*y,*z,*v,sum; 1122 int mbs=a->mbs,i,*idx,*ii,n; 1123 1124 VecGetArray_Fast(xx,x); 1125 VecGetArray_Fast(yy,y); 1126 VecGetArray_Fast(zz,z); 1127 1128 idx = a->j; 1129 v = a->a; 1130 ii = a->i; 1131 1132 for ( i=0; i<mbs; i++ ) { 1133 n = ii[1] - ii[0]; ii++; 1134 sum = y[i]; 1135 while (n--) sum += *v++ * x[*idx++]; 1136 z[i] = sum; 1137 } 1138 VecRestoreArray_Fast(xx,x); 1139 VecRestoreArray_Fast(yy,y); 1140 VecRestoreArray_Fast(zz,z); 1141 PLogFlops(2*a->nz); 1142 return 0; 1143 } 1144 1145 #undef __FUNC__ 1146 #define __FUNC__ "MatMultAdd_SeqBAIJ_2" 1147 static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 1148 { 1149 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1150 register Scalar *x,*y,*z,*v,*xb,sum1,sum2; 1151 register Scalar x1,x2; 1152 int mbs=a->mbs,i,*idx,*ii; 1153 int j,n; 1154 1155 VecGetArray_Fast(xx,x); 1156 VecGetArray_Fast(yy,y); 1157 VecGetArray_Fast(zz,z); 1158 1159 idx = a->j; 1160 v = a->a; 1161 ii = a->i; 1162 1163 for ( i=0; i<mbs; i++ ) { 1164 n = ii[1] - ii[0]; ii++; 1165 sum1 = y[0]; sum2 = y[1]; 1166 for ( j=0; j<n; j++ ) { 1167 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 1168 sum1 += v[0]*x1 + v[2]*x2; 1169 sum2 += v[1]*x1 + v[3]*x2; 1170 v += 4; 1171 } 1172 z[0] = sum1; z[1] = sum2; 1173 z += 2; y += 2; 1174 } 1175 VecRestoreArray_Fast(xx,x); 1176 VecRestoreArray_Fast(yy,y); 1177 VecRestoreArray_Fast(zz,z); 1178 PLogFlops(4*a->nz); 1179 return 0; 1180 } 1181 1182 #undef __FUNC__ 1183 #define __FUNC__ "MatMultAdd_SeqBAIJ_3" 1184 static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 1185 { 1186 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1187 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 1188 int mbs=a->mbs,i,*idx,*ii,j,n; 1189 1190 VecGetArray_Fast(xx,x); 1191 VecGetArray_Fast(yy,y); 1192 VecGetArray_Fast(zz,z); 1193 1194 idx = a->j; 1195 v = a->a; 1196 ii = a->i; 1197 1198 for ( i=0; i<mbs; i++ ) { 1199 n = ii[1] - ii[0]; ii++; 1200 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; 1201 for ( j=0; j<n; j++ ) { 1202 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1203 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 1204 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 1205 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 1206 v += 9; 1207 } 1208 z[0] = sum1; z[1] = sum2; z[2] = sum3; 1209 z += 3; y += 3; 1210 } 1211 VecRestoreArray_Fast(xx,x); 1212 VecRestoreArray_Fast(yy,y); 1213 VecRestoreArray_Fast(zz,z); 1214 PLogFlops(18*a->nz); 1215 return 0; 1216 } 1217 1218 #undef __FUNC__ 1219 #define __FUNC__ "MatMultAdd_SeqBAIJ_4" 1220 static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 1221 { 1222 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1223 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4; 1224 register Scalar x1,x2,x3,x4; 1225 int mbs=a->mbs,i,*idx,*ii; 1226 int j,n; 1227 1228 VecGetArray_Fast(xx,x); 1229 VecGetArray_Fast(yy,y); 1230 VecGetArray_Fast(zz,z); 1231 1232 idx = a->j; 1233 v = a->a; 1234 ii = a->i; 1235 1236 for ( i=0; i<mbs; i++ ) { 1237 n = ii[1] - ii[0]; ii++; 1238 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; 1239 for ( j=0; j<n; j++ ) { 1240 xb = x + 4*(*idx++); 1241 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1242 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 1243 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 1244 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 1245 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 1246 v += 16; 1247 } 1248 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 1249 z += 4; y += 4; 1250 } 1251 VecRestoreArray_Fast(xx,x); 1252 VecRestoreArray_Fast(yy,y); 1253 VecRestoreArray_Fast(zz,z); 1254 PLogFlops(32*a->nz); 1255 return 0; 1256 } 1257 1258 #undef __FUNC__ 1259 #define __FUNC__ "MatMultAdd_SeqBAIJ_5" 1260 static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 1261 { 1262 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1263 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5; 1264 register Scalar x1,x2,x3,x4,x5; 1265 int mbs=a->mbs,i,*idx,*ii,j,n; 1266 1267 VecGetArray_Fast(xx,x); 1268 VecGetArray_Fast(yy,y); 1269 VecGetArray_Fast(zz,z); 1270 1271 idx = a->j; 1272 v = a->a; 1273 ii = a->i; 1274 1275 for ( i=0; i<mbs; i++ ) { 1276 n = ii[1] - ii[0]; ii++; 1277 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; 1278 for ( j=0; j<n; j++ ) { 1279 xb = x + 5*(*idx++); 1280 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 1281 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 1282 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 1283 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 1284 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 1285 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 1286 v += 25; 1287 } 1288 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 1289 z += 5; y += 5; 1290 } 1291 VecRestoreArray_Fast(xx,x); 1292 VecRestoreArray_Fast(yy,y); 1293 VecRestoreArray_Fast(zz,z); 1294 PLogFlops(50*a->nz); 1295 return 0; 1296 } 1297 1298 #undef __FUNC__ 1299 #define __FUNC__ "MatMultAdd_SeqBAIJ_7" 1300 static int MatMultAdd_SeqBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1301 { 1302 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1303 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7; 1304 register Scalar x1,x2,x3,x4,x5,x6,x7; 1305 int mbs=a->mbs,i,*idx,*ii,j,n; 1306 1307 VecGetArray_Fast(xx,x); 1308 VecGetArray_Fast(yy,y); 1309 VecGetArray_Fast(zz,z); 1310 1311 idx = a->j; 1312 v = a->a; 1313 ii = a->i; 1314 1315 for ( i=0; i<mbs; i++ ) { 1316 n = ii[1] - ii[0]; ii++; 1317 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; sum7 = y[6]; 1318 for ( j=0; j<n; j++ ) { 1319 xb = x + 7*(*idx++); 1320 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6]; 1321 sum1 += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 1322 sum2 += v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 1323 sum3 += v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 1324 sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 1325 sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 1326 sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 1327 sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 1328 v += 49; 1329 } 1330 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7; 1331 z += 7; y += 7; 1332 } 1333 VecRestoreArray_Fast(xx,x); 1334 VecRestoreArray_Fast(yy,y); 1335 VecRestoreArray_Fast(zz,z); 1336 PLogFlops(98*a->nz); 1337 return 0; 1338 } 1339 1340 1341 #undef __FUNC__ 1342 #define __FUNC__ "MatMultAdd_SeqBAIJ_N" 1343 static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 1344 { 1345 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1346 register Scalar *x,*z,*v,*xb; 1347 int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr; 1348 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1349 1350 if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 1351 1352 VecGetArray_Fast(xx,x); 1353 VecGetArray_Fast(zz,z); 1354 1355 idx = a->j; 1356 v = a->a; 1357 ii = a->i; 1358 1359 1360 if (!a->mult_work) { 1361 k = PetscMax(a->m,a->n); 1362 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work); 1363 } 1364 work = a->mult_work; 1365 for ( i=0; i<mbs; i++ ) { 1366 n = ii[1] - ii[0]; ii++; 1367 ncols = n*bs; 1368 workt = work; 1369 for ( j=0; j<n; j++ ) { 1370 xb = x + bs*(*idx++); 1371 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 1372 workt += bs; 1373 } 1374 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); 1375 v += n*bs2; 1376 z += bs; 1377 } 1378 VecRestoreArray_Fast(xx,x); 1379 VecRestoreArray_Fast(zz,z); 1380 PLogFlops(2*a->nz*bs2 ); 1381 return 0; 1382 } 1383 1384 #undef __FUNC__ 1385 #define __FUNC__ "MatMultTrans_SeqBAIJ" 1386 int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz) 1387 { 1388 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1389 Scalar *xg,*zg,*zb; 1390 register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 1391 int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 1392 int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1393 1394 1395 VecGetArray_Fast(xx,xg); x = xg; 1396 VecGetArray_Fast(zz,zg); z = zg; 1397 PetscMemzero(z,N*sizeof(Scalar)); 1398 1399 idx = a->j; 1400 v = a->a; 1401 ii = a->i; 1402 1403 switch (bs) { 1404 case 1: 1405 for ( i=0; i<mbs; i++ ) { 1406 n = ii[1] - ii[0]; ii++; 1407 xb = x + i; x1 = xb[0]; 1408 ib = idx + ai[i]; 1409 for ( j=0; j<n; j++ ) { 1410 rval = ib[j]; 1411 z[rval] += *v++ * x1; 1412 } 1413 } 1414 break; 1415 case 2: 1416 for ( i=0; i<mbs; i++ ) { 1417 n = ii[1] - ii[0]; ii++; 1418 xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1419 ib = idx + ai[i]; 1420 for ( j=0; j<n; j++ ) { 1421 rval = ib[j]*2; 1422 z[rval++] += v[0]*x1 + v[1]*x2; 1423 z[rval++] += v[2]*x1 + v[3]*x2; 1424 v += 4; 1425 } 1426 } 1427 break; 1428 case 3: 1429 for ( i=0; i<mbs; i++ ) { 1430 n = ii[1] - ii[0]; ii++; 1431 xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1432 ib = idx + ai[i]; 1433 for ( j=0; j<n; j++ ) { 1434 rval = ib[j]*3; 1435 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1436 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1437 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1438 v += 9; 1439 } 1440 } 1441 break; 1442 case 4: 1443 for ( i=0; i<mbs; i++ ) { 1444 n = ii[1] - ii[0]; ii++; 1445 xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1446 ib = idx + ai[i]; 1447 for ( j=0; j<n; j++ ) { 1448 rval = ib[j]*4; 1449 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1450 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1451 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1452 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1453 v += 16; 1454 } 1455 } 1456 break; 1457 case 5: 1458 for ( i=0; i<mbs; i++ ) { 1459 n = ii[1] - ii[0]; ii++; 1460 xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1461 x4 = xb[3]; x5 = xb[4]; 1462 ib = idx + ai[i]; 1463 for ( j=0; j<n; j++ ) { 1464 rval = ib[j]*5; 1465 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1466 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1467 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1468 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1469 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1470 v += 25; 1471 } 1472 } 1473 break; 1474 /* block sizes larger then 5 by 5 are handled by BLAS */ 1475 default: { 1476 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1477 if (!a->mult_work) { 1478 k = PetscMax(a->m,a->n); 1479 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1480 CHKPTRQ(a->mult_work); 1481 } 1482 work = a->mult_work; 1483 for ( i=0; i<mbs; i++ ) { 1484 n = ii[1] - ii[0]; ii++; 1485 ncols = n*bs; 1486 PetscMemzero(work,ncols*sizeof(Scalar)); 1487 LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1488 v += n*bs2; 1489 x += bs; 1490 workt = work; 1491 for ( j=0; j<n; j++ ) { 1492 zb = z + bs*(*idx++); 1493 for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1494 workt += bs; 1495 } 1496 } 1497 } 1498 } 1499 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 1500 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1501 PLogFlops(2*a->nz*a->bs2 - a->n); 1502 return 0; 1503 } 1504 1505 #undef __FUNC__ 1506 #define __FUNC__ "MatMultTransAdd_SeqBAIJ" 1507 int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1508 { 1509 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1510 Scalar *xg,*zg,*zb; 1511 register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 1512 int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 1513 int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1514 1515 1516 1517 VecGetArray_Fast(xx,xg); x = xg; 1518 VecGetArray_Fast(zz,zg); z = zg; 1519 1520 if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 1521 else PetscMemzero(z,N*sizeof(Scalar)); 1522 1523 1524 idx = a->j; 1525 v = a->a; 1526 ii = a->i; 1527 1528 switch (bs) { 1529 case 1: 1530 for ( i=0; i<mbs; i++ ) { 1531 n = ii[1] - ii[0]; ii++; 1532 xb = x + i; x1 = xb[0]; 1533 ib = idx + ai[i]; 1534 for ( j=0; j<n; j++ ) { 1535 rval = ib[j]; 1536 z[rval] += *v++ * x1; 1537 } 1538 } 1539 break; 1540 case 2: 1541 for ( i=0; i<mbs; i++ ) { 1542 n = ii[1] - ii[0]; ii++; 1543 xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1544 ib = idx + ai[i]; 1545 for ( j=0; j<n; j++ ) { 1546 rval = ib[j]*2; 1547 z[rval++] += v[0]*x1 + v[1]*x2; 1548 z[rval++] += v[2]*x1 + v[3]*x2; 1549 v += 4; 1550 } 1551 } 1552 break; 1553 case 3: 1554 for ( i=0; i<mbs; i++ ) { 1555 n = ii[1] - ii[0]; ii++; 1556 xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1557 ib = idx + ai[i]; 1558 for ( j=0; j<n; j++ ) { 1559 rval = ib[j]*3; 1560 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1561 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1562 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1563 v += 9; 1564 } 1565 } 1566 break; 1567 case 4: 1568 for ( i=0; i<mbs; i++ ) { 1569 n = ii[1] - ii[0]; ii++; 1570 xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1571 ib = idx + ai[i]; 1572 for ( j=0; j<n; j++ ) { 1573 rval = ib[j]*4; 1574 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1575 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1576 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1577 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1578 v += 16; 1579 } 1580 } 1581 break; 1582 case 5: 1583 for ( i=0; i<mbs; i++ ) { 1584 n = ii[1] - ii[0]; ii++; 1585 xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1586 x4 = xb[3]; x5 = xb[4]; 1587 ib = idx + ai[i]; 1588 for ( j=0; j<n; j++ ) { 1589 rval = ib[j]*5; 1590 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1591 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1592 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1593 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1594 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1595 v += 25; 1596 } 1597 } 1598 break; 1599 /* block sizes larger then 5 by 5 are handled by BLAS */ 1600 default: { 1601 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1602 if (!a->mult_work) { 1603 k = PetscMax(a->m,a->n); 1604 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1605 CHKPTRQ(a->mult_work); 1606 } 1607 work = a->mult_work; 1608 for ( i=0; i<mbs; i++ ) { 1609 n = ii[1] - ii[0]; ii++; 1610 ncols = n*bs; 1611 PetscMemzero(work,ncols*sizeof(Scalar)); 1612 LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1613 v += n*bs2; 1614 x += bs; 1615 workt = work; 1616 for ( j=0; j<n; j++ ) { 1617 zb = z + bs*(*idx++); 1618 for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1619 workt += bs; 1620 } 1621 } 1622 } 1623 } 1624 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 1625 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1626 PLogFlops(2*a->nz*a->bs2); 1627 return 0; 1628 } 1629 1630 #undef __FUNC__ 1631 #define __FUNC__ "MatGetInfo_SeqBAIJ" /* ADIC Ignore */ 1632 int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info) 1633 { 1634 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1635 1636 info->rows_global = (double)a->m; 1637 info->columns_global = (double)a->n; 1638 info->rows_local = (double)a->m; 1639 info->columns_local = (double)a->n; 1640 info->block_size = a->bs2; 1641 info->nz_allocated = a->maxnz; 1642 info->nz_used = a->bs2*a->nz; 1643 info->nz_unneeded = (double)(info->nz_allocated - info->nz_used); 1644 /* if (info->nz_unneeded != A->info.nz_unneeded) 1645 printf("space descrepancy: maxnz-nz = %d, nz_unneeded = %d\n",(int)info->nz_unneeded,(int)A->info.nz_unneeded); */ 1646 info->assemblies = A->num_ass; 1647 info->mallocs = a->reallocs; 1648 info->memory = A->mem; 1649 if (A->factor) { 1650 info->fill_ratio_given = A->info.fill_ratio_given; 1651 info->fill_ratio_needed = A->info.fill_ratio_needed; 1652 info->factor_mallocs = A->info.factor_mallocs; 1653 } else { 1654 info->fill_ratio_given = 0; 1655 info->fill_ratio_needed = 0; 1656 info->factor_mallocs = 0; 1657 } 1658 return 0; 1659 } 1660 1661 #undef __FUNC__ 1662 #define __FUNC__ "MatEqual_SeqBAIJ" 1663 int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg) 1664 { 1665 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 1666 1667 if (B->type !=MATSEQBAIJ)SETERRQ(1,0,"Matrices must be same type"); 1668 1669 /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 1670 if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)|| 1671 (a->nz != b->nz)) { 1672 *flg = PETSC_FALSE; return 0; 1673 } 1674 1675 /* if the a->i are the same */ 1676 if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) { 1677 *flg = PETSC_FALSE; return 0; 1678 } 1679 1680 /* if a->j are the same */ 1681 if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) { 1682 *flg = PETSC_FALSE; return 0; 1683 } 1684 1685 /* if a->a are the same */ 1686 if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) { 1687 *flg = PETSC_FALSE; return 0; 1688 } 1689 *flg = PETSC_TRUE; 1690 return 0; 1691 1692 } 1693 1694 #undef __FUNC__ 1695 #define __FUNC__ "MatGetDiagonal_SeqBAIJ" 1696 int MatGetDiagonal_SeqBAIJ(Mat A,Vec v) 1697 { 1698 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1699 int i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 1700 Scalar *x, zero = 0.0,*aa,*aa_j; 1701 1702 if (A->factor) SETERRQ(1,0,"Not for factored matrix"); 1703 bs = a->bs; 1704 aa = a->a; 1705 ai = a->i; 1706 aj = a->j; 1707 ambs = a->mbs; 1708 bs2 = a->bs2; 1709 1710 VecSet(&zero,v); 1711 VecGetArray_Fast(v,x); VecGetLocalSize_Fast(v,n); 1712 if (n != a->m) SETERRQ(1,0,"Nonconforming matrix and vector"); 1713 for ( i=0; i<ambs; i++ ) { 1714 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1715 if (aj[j] == i) { 1716 row = i*bs; 1717 aa_j = aa+j*bs2; 1718 for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 1719 break; 1720 } 1721 } 1722 } 1723 return 0; 1724 } 1725 1726 #undef __FUNC__ 1727 #define __FUNC__ "MatDiagonalScale_SeqBAIJ" 1728 int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr) 1729 { 1730 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1731 Scalar *l,*r,x,*v,*aa,*li,*ri; 1732 int i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2; 1733 1734 ai = a->i; 1735 aj = a->j; 1736 aa = a->a; 1737 m = a->m; 1738 n = a->n; 1739 bs = a->bs; 1740 mbs = a->mbs; 1741 bs2 = a->bs2; 1742 if (ll) { 1743 VecGetArray_Fast(ll,l); VecGetLocalSize_Fast(ll,lm); 1744 if (lm != m) SETERRQ(1,0,"Left scaling vector wrong length"); 1745 for ( i=0; i<mbs; i++ ) { /* for each block row */ 1746 M = ai[i+1] - ai[i]; 1747 li = l + i*bs; 1748 v = aa + bs2*ai[i]; 1749 for ( j=0; j<M; j++ ) { /* for each block */ 1750 for ( k=0; k<bs2; k++ ) { 1751 (*v++) *= li[k%bs]; 1752 } 1753 } 1754 } 1755 } 1756 1757 if (rr) { 1758 VecGetArray_Fast(rr,r); VecGetLocalSize_Fast(rr,rn); 1759 if (rn != n) SETERRQ(1,0,"Right scaling vector wrong length"); 1760 for ( i=0; i<mbs; i++ ) { /* for each block row */ 1761 M = ai[i+1] - ai[i]; 1762 v = aa + bs2*ai[i]; 1763 for ( j=0; j<M; j++ ) { /* for each block */ 1764 ri = r + bs*aj[ai[i]+j]; 1765 for ( k=0; k<bs; k++ ) { 1766 x = ri[k]; 1767 for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x; 1768 } 1769 } 1770 } 1771 } 1772 return 0; 1773 } 1774 1775 1776 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 1777 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 1778 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int); 1779 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*); 1780 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**); 1781 1782 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec); 1783 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec); 1784 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec); 1785 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec); 1786 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec); 1787 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec); 1788 extern int MatSolve_SeqBAIJ_7(Mat,Vec,Vec); 1789 1790 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 1791 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 1792 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 1793 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 1794 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 1795 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 1796 1797 #undef __FUNC__ 1798 #define __FUNC__ "MatNorm_SeqBAIJ" 1799 int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm) 1800 { 1801 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1802 Scalar *v = a->a; 1803 double sum = 0.0; 1804 int i,nz=a->nz,bs2=a->bs2; 1805 1806 if (type == NORM_FROBENIUS) { 1807 for (i=0; i< bs2*nz; i++ ) { 1808 #if defined(PETSC_COMPLEX) 1809 sum += real(conj(*v)*(*v)); v++; 1810 #else 1811 sum += (*v)*(*v); v++; 1812 #endif 1813 } 1814 *norm = sqrt(sum); 1815 } 1816 else { 1817 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 1818 } 1819 return 0; 1820 } 1821 1822 /* 1823 note: This can only work for identity for row and col. It would 1824 be good to check this and otherwise generate an error. 1825 */ 1826 #undef __FUNC__ 1827 #define __FUNC__ "MatILUFactor_SeqBAIJ" 1828 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill) 1829 { 1830 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1831 Mat outA; 1832 int ierr; 1833 1834 if (fill != 0) SETERRQ(1,0,"Only fill=0 supported"); 1835 1836 outA = inA; 1837 inA->factor = FACTOR_LU; 1838 a->row = row; 1839 a->col = col; 1840 1841 if (!a->solve_work) { 1842 a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 1843 PLogObjectMemory(inA,(a->m+a->bs)*sizeof(Scalar)); 1844 } 1845 1846 if (!a->diag) { 1847 ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr); 1848 } 1849 ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr); 1850 return 0; 1851 } 1852 1853 #undef __FUNC__ 1854 #define __FUNC__ "MatScale_SeqBAIJ" 1855 int MatScale_SeqBAIJ(Scalar *alpha,Mat inA) 1856 { 1857 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1858 int one = 1, totalnz = a->bs2*a->nz; 1859 BLscal_( &totalnz, alpha, a->a, &one ); 1860 PLogFlops(totalnz); 1861 return 0; 1862 } 1863 1864 #undef __FUNC__ 1865 #define __FUNC__ "MatGetValues_SeqBAIJ" 1866 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 1867 { 1868 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1869 int *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 1870 int *ai = a->i, *ailen = a->ilen; 1871 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 1872 Scalar *ap, *aa = a->a, zero = 0.0; 1873 1874 for ( k=0; k<m; k++ ) { /* loop over rows */ 1875 row = im[k]; brow = row/bs; 1876 if (row < 0) SETERRQ(1,0,"Negative row"); 1877 if (row >= a->m) SETERRQ(1,0,"Row too large"); 1878 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1879 nrow = ailen[brow]; 1880 for ( l=0; l<n; l++ ) { /* loop over columns */ 1881 if (in[l] < 0) SETERRQ(1,0,"Negative column"); 1882 if (in[l] >= a->n) SETERRQ(1,0,"Column too large"); 1883 col = in[l] ; 1884 bcol = col/bs; 1885 cidx = col%bs; 1886 ridx = row%bs; 1887 high = nrow; 1888 low = 0; /* assume unsorted */ 1889 while (high-low > 5) { 1890 t = (low+high)/2; 1891 if (rp[t] > bcol) high = t; 1892 else low = t; 1893 } 1894 for ( i=low; i<high; i++ ) { 1895 if (rp[i] > bcol) break; 1896 if (rp[i] == bcol) { 1897 *v++ = ap[bs2*i+bs*cidx+ridx]; 1898 goto finished; 1899 } 1900 } 1901 *v++ = zero; 1902 finished:; 1903 } 1904 } 1905 return 0; 1906 } 1907 1908 #undef __FUNC__ 1909 #define __FUNC__ "MatGetBlockSize_SeqBAIJ" /* ADIC Ignore */ 1910 int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs) 1911 { 1912 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data; 1913 *bs = baij->bs; 1914 return 0; 1915 } 1916 1917 /* idx should be of length atlease bs */ 1918 #undef __FUNC__ 1919 #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Block" 1920 static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg) 1921 { 1922 int i,row; 1923 row = idx[0]; 1924 if (row%bs!=0) { *flg = PETSC_FALSE; return 0; } 1925 1926 for ( i=1; i<bs; i++ ) { 1927 if (row+i != idx[i]) { *flg = PETSC_FALSE; return 0; } 1928 } 1929 *flg = PETSC_TRUE; 1930 return 0; 1931 } 1932 1933 #undef __FUNC__ 1934 #define __FUNC__ "MatZeroRows_SeqBAIJ" 1935 int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag) 1936 { 1937 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1938 IS is_local; 1939 int ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2; 1940 PetscTruth flg; 1941 Scalar zero = 0.0,*aa; 1942 1943 /* Make a copy of the IS and sort it */ 1944 ierr = ISGetSize(is,&is_n);CHKERRQ(ierr); 1945 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1946 ierr = ISCreateGeneral(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr); 1947 ierr = ISSort(is_local); CHKERRQ(ierr); 1948 ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr); 1949 1950 i = 0; 1951 while (i < is_n) { 1952 if (rows[i]<0 || rows[i]>m) SETERRQ(1,0,"row out of range"); 1953 flg = PETSC_FALSE; 1954 if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); } 1955 if (flg) { /* There exists a block of rows to be Zerowed */ 1956 baij->ilen[rows[i]/bs] = 0; 1957 i += bs; 1958 } else { /* Zero out only the requested row */ 1959 count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs; 1960 aa = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs); 1961 for ( j=0; j<count; j++ ) { 1962 aa[0] = zero; 1963 aa+=bs; 1964 } 1965 i++; 1966 } 1967 } 1968 if (diag) { 1969 for ( j=0; j<is_n; j++ ) { 1970 ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1971 } 1972 } 1973 ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr); 1974 ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr); 1975 ierr = ISDestroy(is_local); CHKERRQ(ierr); 1976 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1977 1978 return 0; 1979 } 1980 1981 #undef __FUNC__ 1982 #define __FUNC__ "MatPrintHelp_SeqBAIJ" /* ADIC Ignore */ 1983 int MatPrintHelp_SeqBAIJ(Mat A) 1984 { 1985 static int called = 0; 1986 MPI_Comm comm = A->comm; 1987 1988 if (called) return 0; else called = 1; 1989 PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n"); 1990 PetscPrintf(comm," -mat_block_size <block_size>\n"); 1991 return 0; 1992 } 1993 1994 /* -------------------------------------------------------------------*/ 1995 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ, 1996 MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ, 1997 MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N, 1998 MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ, 1999 MatSolve_SeqBAIJ_N,0, 2000 0,0, 2001 MatLUFactor_SeqBAIJ,0, 2002 0, 2003 MatTranspose_SeqBAIJ, 2004 MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ, 2005 MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ, 2006 0,MatAssemblyEnd_SeqBAIJ, 2007 0, 2008 MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ, 2009 MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0, 2010 MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ, 2011 MatILUFactorSymbolic_SeqBAIJ,0, 2012 0,0, 2013 MatConvertSameType_SeqBAIJ,0,0, 2014 MatILUFactor_SeqBAIJ,0,0, 2015 MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ, 2016 MatGetValues_SeqBAIJ,0, 2017 MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ, 2018 0,0,0,MatGetBlockSize_SeqBAIJ, 2019 MatGetRowIJ_SeqBAIJ, 2020 MatRestoreRowIJ_SeqBAIJ, 2021 0,0,0,0,0,0, 2022 MatSetValuesBlocked_SeqBAIJ}; 2023 2024 #undef __FUNC__ 2025 #define __FUNC__ "MatCreateSeqBAIJ" 2026 /*@C 2027 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 2028 compressed row) format. For good matrix assembly performance the 2029 user should preallocate the matrix storage by setting the parameter nz 2030 (or the array nzz). By setting these parameters accurately, performance 2031 during matrix assembly can be increased by more than a factor of 50. 2032 2033 Input Parameters: 2034 . comm - MPI communicator, set to PETSC_COMM_SELF 2035 . bs - size of block 2036 . m - number of rows 2037 . n - number of columns 2038 . nz - number of block nonzeros per block row (same for all rows) 2039 . nzz - array containing the number of block nonzeros in the various block rows 2040 (possibly different for each block row) or PETSC_NULL 2041 2042 Output Parameter: 2043 . A - the matrix 2044 2045 Options Database Keys: 2046 $ -mat_no_unroll - uses code that does not unroll the loops in the 2047 $ block calculations (much solver) 2048 $ -mat_block_size - size of the blocks to use 2049 2050 Notes: 2051 The block AIJ format is fully compatible with standard Fortran 77 2052 storage. That is, the stored row and column indices can begin at 2053 either one (as in Fortran) or zero. See the users' manual for details. 2054 2055 Specify the preallocated storage with either nz or nnz (not both). 2056 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2057 allocation. For additional details, see the users manual chapter on 2058 matrices. 2059 2060 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2061 @*/ 2062 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 2063 { 2064 Mat B; 2065 Mat_SeqBAIJ *b; 2066 int i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs,size; 2067 2068 MPI_Comm_size(comm,&size); 2069 if (size > 1) SETERRQ(1,0,"Comm must be of size 1"); 2070 2071 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr); 2072 2073 if (mbs*bs!=m || nbs*bs!=n) 2074 SETERRQ(1,0,"Number rows, cols must be divisible by blocksize"); 2075 2076 *A = 0; 2077 PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATSEQBAIJ,comm); 2078 PLogObjectCreate(B); 2079 B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b); 2080 PetscMemzero(b,sizeof(Mat_SeqBAIJ)); 2081 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 2082 ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr); 2083 if (!flg) { 2084 switch (bs) { 2085 case 1: 2086 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2087 B->ops.solve = MatSolve_SeqBAIJ_1; 2088 B->ops.mult = MatMult_SeqBAIJ_1; 2089 B->ops.multadd = MatMultAdd_SeqBAIJ_1; 2090 break; 2091 case 2: 2092 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2093 B->ops.solve = MatSolve_SeqBAIJ_2; 2094 B->ops.mult = MatMult_SeqBAIJ_2; 2095 B->ops.multadd = MatMultAdd_SeqBAIJ_2; 2096 break; 2097 case 3: 2098 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2099 B->ops.solve = MatSolve_SeqBAIJ_3; 2100 B->ops.mult = MatMult_SeqBAIJ_3; 2101 B->ops.multadd = MatMultAdd_SeqBAIJ_3; 2102 break; 2103 case 4: 2104 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2105 B->ops.solve = MatSolve_SeqBAIJ_4; 2106 B->ops.mult = MatMult_SeqBAIJ_4; 2107 B->ops.multadd = MatMultAdd_SeqBAIJ_4; 2108 break; 2109 case 5: 2110 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2111 B->ops.solve = MatSolve_SeqBAIJ_5; 2112 B->ops.mult = MatMult_SeqBAIJ_5; 2113 B->ops.multadd = MatMultAdd_SeqBAIJ_5; 2114 break; 2115 case 7: 2116 B->ops.mult = MatMult_SeqBAIJ_7; 2117 B->ops.solve = MatSolve_SeqBAIJ_7; 2118 B->ops.multadd = MatMultAdd_SeqBAIJ_7; 2119 break; 2120 } 2121 } 2122 B->destroy = MatDestroy_SeqBAIJ; 2123 B->view = MatView_SeqBAIJ; 2124 B->factor = 0; 2125 B->lupivotthreshold = 1.0; 2126 B->mapping = 0; 2127 b->row = 0; 2128 b->col = 0; 2129 b->reallocs = 0; 2130 2131 b->m = m; B->m = m; B->M = m; 2132 b->n = n; B->n = n; B->N = n; 2133 b->mbs = mbs; 2134 b->nbs = nbs; 2135 b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax); 2136 if (nnz == PETSC_NULL) { 2137 if (nz == PETSC_DEFAULT) nz = 5; 2138 else if (nz <= 0) nz = 1; 2139 for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 2140 nz = nz*mbs; 2141 } 2142 else { 2143 nz = 0; 2144 for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 2145 } 2146 2147 /* allocate the matrix space */ 2148 len = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int); 2149 b->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a); 2150 PetscMemzero(b->a,nz*bs2*sizeof(Scalar)); 2151 b->j = (int *) (b->a + nz*bs2); 2152 PetscMemzero(b->j,nz*sizeof(int)); 2153 b->i = b->j + nz; 2154 b->singlemalloc = 1; 2155 2156 b->i[0] = 0; 2157 for (i=1; i<mbs+1; i++) { 2158 b->i[i] = b->i[i-1] + b->imax[i-1]; 2159 } 2160 2161 /* b->ilen will count nonzeros in each block row so far. */ 2162 b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 2163 PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 2164 for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 2165 2166 b->bs = bs; 2167 b->bs2 = bs2; 2168 b->mbs = mbs; 2169 b->nz = 0; 2170 b->maxnz = nz*bs2; 2171 b->sorted = 0; 2172 b->roworiented = 1; 2173 b->nonew = 0; 2174 b->diag = 0; 2175 b->solve_work = 0; 2176 b->mult_work = 0; 2177 b->spptr = 0; 2178 B->info.nz_unneeded = (double)b->maxnz; 2179 2180 *A = B; 2181 ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr); 2182 if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); } 2183 return 0; 2184 } 2185 2186 #undef __FUNC__ 2187 #define __FUNC__ "MatConvertSameType_SeqBAIJ" 2188 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues) 2189 { 2190 Mat C; 2191 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 2192 int i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 2193 2194 if (a->i[mbs] != nz) SETERRQ(1,0,"Corrupt matrix"); 2195 2196 *B = 0; 2197 PetscHeaderCreate(C,_p_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm); 2198 PLogObjectCreate(C); 2199 C->data = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c); 2200 PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps)); 2201 C->destroy = MatDestroy_SeqBAIJ; 2202 C->view = MatView_SeqBAIJ; 2203 C->factor = A->factor; 2204 c->row = 0; 2205 c->col = 0; 2206 C->assembled = PETSC_TRUE; 2207 2208 c->m = C->m = a->m; 2209 c->n = C->n = a->n; 2210 C->M = a->m; 2211 C->N = a->n; 2212 2213 c->bs = a->bs; 2214 c->bs2 = a->bs2; 2215 c->mbs = a->mbs; 2216 c->nbs = a->nbs; 2217 2218 c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax); 2219 c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen); 2220 for ( i=0; i<mbs; i++ ) { 2221 c->imax[i] = a->imax[i]; 2222 c->ilen[i] = a->ilen[i]; 2223 } 2224 2225 /* allocate the matrix space */ 2226 c->singlemalloc = 1; 2227 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int)); 2228 c->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a); 2229 c->j = (int *) (c->a + nz*bs2); 2230 c->i = c->j + nz; 2231 PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int)); 2232 if (mbs > 0) { 2233 PetscMemcpy(c->j,a->j,nz*sizeof(int)); 2234 if (cpvalues == COPY_VALUES) { 2235 PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar)); 2236 } 2237 } 2238 2239 PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 2240 c->sorted = a->sorted; 2241 c->roworiented = a->roworiented; 2242 c->nonew = a->nonew; 2243 2244 if (a->diag) { 2245 c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag); 2246 PLogObjectMemory(C,(mbs+1)*sizeof(int)); 2247 for ( i=0; i<mbs; i++ ) { 2248 c->diag[i] = a->diag[i]; 2249 } 2250 } 2251 else c->diag = 0; 2252 c->nz = a->nz; 2253 c->maxnz = a->maxnz; 2254 c->solve_work = 0; 2255 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 2256 c->mult_work = 0; 2257 *B = C; 2258 return 0; 2259 } 2260 2261 #undef __FUNC__ 2262 #define __FUNC__ "MatLoad_SeqBAIJ" 2263 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 2264 { 2265 Mat_SeqBAIJ *a; 2266 Mat B; 2267 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg; 2268 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2269 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 2270 int *masked, nmask,tmp,bs2,ishift; 2271 Scalar *aa; 2272 MPI_Comm comm = ((PetscObject) viewer)->comm; 2273 2274 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 2275 bs2 = bs*bs; 2276 2277 MPI_Comm_size(comm,&size); 2278 if (size > 1) SETERRQ(1,0,"view must have one processor"); 2279 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 2280 ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr); 2281 if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not Mat object"); 2282 M = header[1]; N = header[2]; nz = header[3]; 2283 2284 if (M != N) SETERRQ(1,0,"Can only do square matrices"); 2285 2286 /* 2287 This code adds extra rows to make sure the number of rows is 2288 divisible by the blocksize 2289 */ 2290 mbs = M/bs; 2291 extra_rows = bs - M + bs*(mbs); 2292 if (extra_rows == bs) extra_rows = 0; 2293 else mbs++; 2294 if (extra_rows) { 2295 PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 2296 } 2297 2298 /* read in row lengths */ 2299 rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 2300 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 2301 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 2302 2303 /* read in column indices */ 2304 jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj); 2305 ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr); 2306 for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 2307 2308 /* loop over row lengths determining block row lengths */ 2309 browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 2310 PetscMemzero(browlengths,mbs*sizeof(int)); 2311 mask = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask); 2312 PetscMemzero(mask,mbs*sizeof(int)); 2313 masked = mask + mbs; 2314 rowcount = 0; nzcount = 0; 2315 for ( i=0; i<mbs; i++ ) { 2316 nmask = 0; 2317 for ( j=0; j<bs; j++ ) { 2318 kmax = rowlengths[rowcount]; 2319 for ( k=0; k<kmax; k++ ) { 2320 tmp = jj[nzcount++]/bs; 2321 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2322 } 2323 rowcount++; 2324 } 2325 browlengths[i] += nmask; 2326 /* zero out the mask elements we set */ 2327 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 2328 } 2329 2330 /* create our matrix */ 2331 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A); 2332 CHKERRQ(ierr); 2333 B = *A; 2334 a = (Mat_SeqBAIJ *) B->data; 2335 2336 /* set matrix "i" values */ 2337 a->i[0] = 0; 2338 for ( i=1; i<= mbs; i++ ) { 2339 a->i[i] = a->i[i-1] + browlengths[i-1]; 2340 a->ilen[i-1] = browlengths[i-1]; 2341 } 2342 a->nz = 0; 2343 for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 2344 2345 /* read in nonzero values */ 2346 aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 2347 ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr); 2348 for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 2349 2350 /* set "a" and "j" values into matrix */ 2351 nzcount = 0; jcount = 0; 2352 for ( i=0; i<mbs; i++ ) { 2353 nzcountb = nzcount; 2354 nmask = 0; 2355 for ( j=0; j<bs; j++ ) { 2356 kmax = rowlengths[i*bs+j]; 2357 for ( k=0; k<kmax; k++ ) { 2358 tmp = jj[nzcount++]/bs; 2359 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2360 } 2361 rowcount++; 2362 } 2363 /* sort the masked values */ 2364 PetscSortInt(nmask,masked); 2365 2366 /* set "j" values into matrix */ 2367 maskcount = 1; 2368 for ( j=0; j<nmask; j++ ) { 2369 a->j[jcount++] = masked[j]; 2370 mask[masked[j]] = maskcount++; 2371 } 2372 /* set "a" values into matrix */ 2373 ishift = bs2*a->i[i]; 2374 for ( j=0; j<bs; j++ ) { 2375 kmax = rowlengths[i*bs+j]; 2376 for ( k=0; k<kmax; k++ ) { 2377 tmp = jj[nzcountb]/bs ; 2378 block = mask[tmp] - 1; 2379 point = jj[nzcountb] - bs*tmp; 2380 idx = ishift + bs2*block + j + bs*point; 2381 a->a[idx] = aa[nzcountb++]; 2382 } 2383 } 2384 /* zero out the mask elements we set */ 2385 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 2386 } 2387 if (jcount != a->nz) SETERRQ(1,0,"Error bad binary matrix"); 2388 2389 PetscFree(rowlengths); 2390 PetscFree(browlengths); 2391 PetscFree(aa); 2392 PetscFree(jj); 2393 PetscFree(mask); 2394 2395 B->assembled = PETSC_TRUE; 2396 2397 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr); 2398 if (flg) { 2399 Viewer tviewer; 2400 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2401 ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO,0);CHKERRQ(ierr); 2402 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2403 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2404 } 2405 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr); 2406 if (flg) { 2407 Viewer tviewer; 2408 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2409 ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO_LONG,0);CHKERRQ(ierr); 2410 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2411 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2412 } 2413 ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr); 2414 if (flg) { 2415 Viewer tviewer; 2416 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2417 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2418 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2419 } 2420 ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr); 2421 if (flg) { 2422 Viewer tviewer; 2423 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2424 ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_MATLAB,"M");CHKERRQ(ierr); 2425 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2426 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2427 } 2428 ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr); 2429 if (flg) { 2430 Viewer tviewer; 2431 ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr); 2432 ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr); 2433 ierr = ViewerFlush(tviewer); CHKERRQ(ierr); 2434 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2435 } 2436 return 0; 2437 } 2438 2439 2440 2441