1 #ifndef lint 2 static char vcid[] = "$Id: baij.c,v 1.87 1997/02/03 20:15:40 bsmith Exp balay $"; 3 #endif 4 5 /* 6 Defines the basic matrix operations for the BAIJ (compressed row) 7 matrix storage format. 8 */ 9 #include "src/mat/impls/baij/seq/baij.h" 10 #include "src/vec/vecimpl.h" 11 #include "src/inline/spops.h" 12 #include "petsc.h" 13 14 15 /* 16 Adds diagonal pointers to sparse matrix structure. 17 */ 18 19 #undef __FUNC__ 20 #define __FUNC__ "MatMarkDiag_SeqBAIJ" 21 int MatMarkDiag_SeqBAIJ(Mat A) 22 { 23 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 24 int i,j, *diag, m = a->mbs; 25 26 diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag); 27 PLogObjectMemory(A,(m+1)*sizeof(int)); 28 for ( i=0; i<m; i++ ) { 29 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 30 if (a->j[j] == i) { 31 diag[i] = j; 32 break; 33 } 34 } 35 } 36 a->diag = diag; 37 return 0; 38 } 39 40 #include "draw.h" 41 #include "pinclude/pviewer.h" 42 #include "sys.h" 43 44 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 45 46 #undef __FUNC__ 47 #define __FUNC__ "MatGetRowIJ_SeqBAIJ" 48 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 49 PetscTruth *done) 50 { 51 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 52 int ierr, n = a->mbs,i; 53 54 *nn = n; 55 if (!ia) return 0; 56 if (symmetric) { 57 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 58 } else if (oshift == 1) { 59 /* temporarily add 1 to i and j indices */ 60 int nz = a->i[n] + 1; 61 for ( i=0; i<nz; i++ ) a->j[i]++; 62 for ( i=0; i<n+1; i++ ) a->i[i]++; 63 *ia = a->i; *ja = a->j; 64 } else { 65 *ia = a->i; *ja = a->j; 66 } 67 68 return 0; 69 } 70 71 #undef __FUNC__ 72 #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ" 73 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 74 PetscTruth *done) 75 { 76 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 77 int i,n = a->mbs; 78 79 if (!ia) return 0; 80 if (symmetric) { 81 PetscFree(*ia); 82 PetscFree(*ja); 83 } else if (oshift == 1) { 84 int nz = a->i[n]; 85 for ( i=0; i<nz; i++ ) a->j[i]--; 86 for ( i=0; i<n+1; i++ ) a->i[i]--; 87 } 88 return 0; 89 } 90 91 92 #undef __FUNC__ 93 #define __FUNC__ "MatView_SeqBAIJ_Binary" 94 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 95 { 96 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 97 int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 98 Scalar *aa; 99 100 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 101 col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 102 col_lens[0] = MAT_COOKIE; 103 104 col_lens[1] = a->m; 105 col_lens[2] = a->n; 106 col_lens[3] = a->nz*bs2; 107 108 /* store lengths of each row and write (including header) to file */ 109 count = 0; 110 for ( i=0; i<a->mbs; i++ ) { 111 for ( j=0; j<bs; j++ ) { 112 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 113 } 114 } 115 ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr); 116 PetscFree(col_lens); 117 118 /* store column indices (zero start index) */ 119 jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj); 120 count = 0; 121 for ( i=0; i<a->mbs; i++ ) { 122 for ( j=0; j<bs; j++ ) { 123 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 124 for ( l=0; l<bs; l++ ) { 125 jj[count++] = bs*a->j[k] + l; 126 } 127 } 128 } 129 } 130 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr); 131 PetscFree(jj); 132 133 /* store nonzero values */ 134 aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa); 135 count = 0; 136 for ( i=0; i<a->mbs; i++ ) { 137 for ( j=0; j<bs; j++ ) { 138 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 139 for ( l=0; l<bs; l++ ) { 140 aa[count++] = a->a[bs2*k + l*bs + j]; 141 } 142 } 143 } 144 } 145 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr); 146 PetscFree(aa); 147 return 0; 148 } 149 150 #undef __FUNC__ 151 #define __FUNC__ "MatView_SeqBAIJ_ASCII" 152 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 153 { 154 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 155 int ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2; 156 FILE *fd; 157 char *outputname; 158 159 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 160 ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr); 161 ierr = ViewerGetFormat(viewer,&format); 162 if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) { 163 fprintf(fd," block size is %d\n",bs); 164 } 165 else if (format == VIEWER_FORMAT_ASCII_MATLAB) { 166 SETERRQ(1,0,"Matlab format not supported"); 167 } 168 else if (format == VIEWER_FORMAT_ASCII_COMMON) { 169 for ( i=0; i<a->mbs; i++ ) { 170 for ( j=0; j<bs; j++ ) { 171 fprintf(fd,"row %d:",i*bs+j); 172 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 173 for ( l=0; l<bs; l++ ) { 174 #if defined(PETSC_COMPLEX) 175 if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0) 176 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 177 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 178 else if (real(a->a[bs2*k + l*bs + j]) != 0.0) 179 fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 180 #else 181 if (a->a[bs2*k + l*bs + j] != 0.0) 182 fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 183 #endif 184 } 185 } 186 fprintf(fd,"\n"); 187 } 188 } 189 } 190 else { 191 for ( i=0; i<a->mbs; i++ ) { 192 for ( j=0; j<bs; j++ ) { 193 fprintf(fd,"row %d:",i*bs+j); 194 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 195 for ( l=0; l<bs; l++ ) { 196 #if defined(PETSC_COMPLEX) 197 if (imag(a->a[bs2*k + l*bs + j]) != 0.0) { 198 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 199 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 200 } 201 else { 202 fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 203 } 204 #else 205 fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 206 #endif 207 } 208 } 209 fprintf(fd,"\n"); 210 } 211 } 212 } 213 fflush(fd); 214 return 0; 215 } 216 217 #undef __FUNC__ 218 #define __FUNC__ "MatView_SeqBAIJ_Draw" 219 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer) 220 { 221 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *) A->data; 222 int row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2; 223 double xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r; 224 Scalar *aa; 225 Draw draw; 226 DrawButton button; 227 PetscTruth isnull; 228 229 ViewerDrawGetDraw(viewer,&draw); 230 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 231 232 xr = a->n; yr = a->m; h = yr/10.0; w = xr/10.0; 233 xr += w; yr += h; xl = -w; yl = -h; 234 ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr); 235 /* loop over matrix elements drawing boxes */ 236 color = DRAW_BLUE; 237 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 238 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 239 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 240 x_l = a->j[j]*bs; x_r = x_l + 1.0; 241 aa = a->a + j*bs2; 242 for ( k=0; k<bs; k++ ) { 243 for ( l=0; l<bs; l++ ) { 244 if (PetscReal(*aa++) >= 0.) continue; 245 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 246 } 247 } 248 } 249 } 250 color = DRAW_CYAN; 251 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 252 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 253 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 254 x_l = a->j[j]*bs; x_r = x_l + 1.0; 255 aa = a->a + j*bs2; 256 for ( k=0; k<bs; k++ ) { 257 for ( l=0; l<bs; l++ ) { 258 if (PetscReal(*aa++) != 0.) continue; 259 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 260 } 261 } 262 } 263 } 264 265 color = DRAW_RED; 266 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 267 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 268 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 269 x_l = a->j[j]*bs; x_r = x_l + 1.0; 270 aa = a->a + j*bs2; 271 for ( k=0; k<bs; k++ ) { 272 for ( l=0; l<bs; l++ ) { 273 if (PetscReal(*aa++) <= 0.) continue; 274 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 275 } 276 } 277 } 278 } 279 280 DrawFlush(draw); 281 DrawGetPause(draw,&pause); 282 if (pause >= 0) { PetscSleep(pause); return 0;} 283 284 /* allow the matrix to zoom or shrink */ 285 ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0); 286 while (button != BUTTON_RIGHT) { 287 DrawClear(draw); 288 if (button == BUTTON_LEFT) scale = .5; 289 else if (button == BUTTON_CENTER) scale = 2.; 290 xl = scale*(xl + w - xc) + xc - w*scale; 291 xr = scale*(xr - w - xc) + xc + w*scale; 292 yl = scale*(yl + h - yc) + yc - h*scale; 293 yr = scale*(yr - h - yc) + yc + h*scale; 294 w *= scale; h *= scale; 295 ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr); 296 color = DRAW_BLUE; 297 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 298 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 299 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 300 x_l = a->j[j]*bs; x_r = x_l + 1.0; 301 aa = a->a + j*bs2; 302 for ( k=0; k<bs; k++ ) { 303 for ( l=0; l<bs; l++ ) { 304 if (PetscReal(*aa++) >= 0.) continue; 305 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 306 } 307 } 308 } 309 } 310 color = DRAW_CYAN; 311 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 312 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 313 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 314 x_l = a->j[j]*bs; x_r = x_l + 1.0; 315 aa = a->a + j*bs2; 316 for ( k=0; k<bs; k++ ) { 317 for ( l=0; l<bs; l++ ) { 318 if (PetscReal(*aa++) != 0.) continue; 319 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 320 } 321 } 322 } 323 } 324 325 color = DRAW_RED; 326 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 327 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 328 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 329 x_l = a->j[j]*bs; x_r = x_l + 1.0; 330 aa = a->a + j*bs2; 331 for ( k=0; k<bs; k++ ) { 332 for ( l=0; l<bs; l++ ) { 333 if (PetscReal(*aa++) <= 0.) continue; 334 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 335 } 336 } 337 } 338 } 339 ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0); 340 } 341 return 0; 342 } 343 344 #undef __FUNC__ 345 #define __FUNC__ "MatView_SeqBAIJ" 346 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer) 347 { 348 Mat A = (Mat) obj; 349 ViewerType vtype; 350 int ierr; 351 352 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 353 if (vtype == MATLAB_VIEWER) { 354 SETERRQ(1,0,"Matlab viewer not supported"); 355 } 356 else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){ 357 return MatView_SeqBAIJ_ASCII(A,viewer); 358 } 359 else if (vtype == BINARY_FILE_VIEWER) { 360 return MatView_SeqBAIJ_Binary(A,viewer); 361 } 362 else if (vtype == DRAW_VIEWER) { 363 return MatView_SeqBAIJ_Draw(A,viewer); 364 } 365 return 0; 366 } 367 368 #define CHUNKSIZE 10 369 370 #undef __FUNC__ 371 #define __FUNC__ "MatSetValues_SeqBAIJ" 372 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 373 { 374 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 375 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 376 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 377 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 378 int ridx,cidx,bs2=a->bs2; 379 Scalar *ap,value,*aa=a->a,*bap; 380 381 for ( k=0; k<m; k++ ) { /* loop over added rows */ 382 row = im[k]; brow = row/bs; 383 #if defined(PETSC_BOPT_g) 384 if (row < 0) SETERRQ(1,0,"Negative row"); 385 if (row >= a->m) SETERRQ(1,0,"Row too large"); 386 #endif 387 rp = aj + ai[brow]; 388 ap = aa + bs2*ai[brow]; 389 rmax = imax[brow]; 390 nrow = ailen[brow]; 391 low = 0; 392 for ( l=0; l<n; l++ ) { /* loop over added columns */ 393 #if defined(PETSC_BOPT_g) 394 if (in[l] < 0) SETERRQ(1,0,"Negative column"); 395 if (in[l] >= a->n) SETERRQ(1,0,"Column too large"); 396 #endif 397 col = in[l]; bcol = col/bs; 398 ridx = row % bs; cidx = col % bs; 399 if (roworiented) { 400 value = *v++; 401 } 402 else { 403 value = v[k + l*m]; 404 } 405 if (!sorted) low = 0; high = nrow; 406 while (high-low > 7) { 407 t = (low+high)/2; 408 if (rp[t] > bcol) high = t; 409 else low = t; 410 } 411 for ( i=low; i<high; i++ ) { 412 if (rp[i] > bcol) break; 413 if (rp[i] == bcol) { 414 bap = ap + bs2*i + bs*cidx + ridx; 415 if (is == ADD_VALUES) *bap += value; 416 else *bap = value; 417 goto noinsert; 418 } 419 } 420 if (nonew) goto noinsert; 421 if (nrow >= rmax) { 422 /* there is no extra room in row, therefore enlarge */ 423 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 424 Scalar *new_a; 425 426 /* malloc new storage space */ 427 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 428 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 429 new_j = (int *) (new_a + bs2*new_nz); 430 new_i = new_j + new_nz; 431 432 /* copy over old data into new slots */ 433 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} 434 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 435 PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); 436 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 437 PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, 438 len*sizeof(int)); 439 PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); 440 PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 441 PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), 442 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); 443 /* free up old matrix storage */ 444 PetscFree(a->a); 445 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 446 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 447 a->singlemalloc = 1; 448 449 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 450 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 451 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 452 a->maxnz += bs2*CHUNKSIZE; 453 a->reallocs++; 454 a->nz++; 455 } 456 N = nrow++ - 1; 457 /* shift up all the later entries in this row */ 458 for ( ii=N; ii>=i; ii-- ) { 459 rp[ii+1] = rp[ii]; 460 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 461 } 462 if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 463 rp[i] = bcol; 464 ap[bs2*i + bs*cidx + ridx] = value; 465 noinsert:; 466 low = i; 467 } 468 ailen[brow] = nrow; 469 } 470 return 0; 471 } 472 473 #undef __FUNC__ 474 #define __FUNC__ "MatSetValues_SeqBAIJ" 475 int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 476 { 477 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 478 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 479 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 480 int *aj=a->j,nonew=a->nonew,bs=a->bs; 481 int ridx,cidx,bs2=a->bs2; 482 Scalar *ap,value,*aa=a->a,*bap; 483 484 for ( k=0; k<m; k++ ) { /* loop over added rows */ 485 row = im[k]; 486 #if defined(PETSC_BOPT_g) 487 if (row < 0) SETERRQ(1,0,"Negative row"); 488 if (row >= a->m) SETERRQ(1,0,"Row too large"); 489 #endif 490 rp = aj + ai[row]; 491 ap = aa + bs2*ai[row]; 492 rmax = imax[row]; 493 nrow = ailen[row]; 494 low = 0; 495 for ( l=0; l<n; l++ ) { /* loop over added columns */ 496 #if defined(PETSC_BOPT_g) 497 if (in[l] < 0) SETERRQ(1,0,"Negative column"); 498 if (in[l] >= a->n) SETERRQ(1,0,"Column too large"); 499 #endif 500 col = in[l]; 501 ridx = row % bs; cidx = col % bs; 502 if (roworiented) { 503 value = *v++; 504 } 505 else { 506 value = v[k + l*m]; 507 } 508 if (!sorted) low = 0; high = nrow; 509 while (high-low > 7) { 510 t = (low+high)/2; 511 if (rp[t] > col) high = t; 512 else low = t; 513 } 514 for ( i=low; i<high; i++ ) { 515 if (rp[i] > col) break; 516 if (rp[i] == col) { 517 bap = ap + bs2*i + bs*cidx + ridx; 518 if (is == ADD_VALUES) *bap += value; 519 else *bap = value; 520 goto noinsert; 521 } 522 } 523 if (nonew) goto noinsert; 524 if (nrow >= rmax) { 525 /* there is no extra room in row, therefore enlarge */ 526 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 527 Scalar *new_a; 528 529 /* malloc new storage space */ 530 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 531 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 532 new_j = (int *) (new_a + bs2*new_nz); 533 new_i = new_j + new_nz; 534 535 /* copy over old data into new slots */ 536 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} 537 for ( ii=row+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 538 PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int)); 539 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 540 PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow, 541 len*sizeof(int)); 542 PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(Scalar)); 543 PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 544 PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE), 545 aa+bs2*(ai[row]+nrow),bs2*len*sizeof(Scalar)); 546 /* free up old matrix storage */ 547 PetscFree(a->a); 548 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 549 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 550 a->singlemalloc = 1; 551 552 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 553 rmax = imax[row] = imax[row] + CHUNKSIZE; 554 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 555 a->maxnz += bs2*CHUNKSIZE; 556 a->reallocs++; 557 a->nz++; 558 } 559 N = nrow++ - 1; 560 /* shift up all the later entries in this row */ 561 for ( ii=N; ii>=i; ii-- ) { 562 rp[ii+1] = rp[ii]; 563 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 564 } 565 if (N >= i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 566 rp[i] = col; 567 ap[bs2*i + bs*cidx + ridx] = value; 568 noinsert:; 569 low = i; 570 } 571 ailen[row] = nrow; 572 } 573 return 0; 574 } 575 576 #undef __FUNC__ 577 #define __FUNC__ "MatGetSize_SeqBAIJ" 578 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 579 { 580 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 581 *m = a->m; *n = a->n; 582 return 0; 583 } 584 585 #undef __FUNC__ 586 #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ" 587 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 588 { 589 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 590 *m = 0; *n = a->m; 591 return 0; 592 } 593 594 #undef __FUNC__ 595 #define __FUNC__ "MatGetRow_SeqBAIJ" 596 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 597 { 598 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 599 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 600 Scalar *aa,*v_i,*aa_i; 601 602 bs = a->bs; 603 ai = a->i; 604 aj = a->j; 605 aa = a->a; 606 bs2 = a->bs2; 607 608 if (row < 0 || row >= a->m) SETERRQ(1,0,"Row out of range"); 609 610 bn = row/bs; /* Block number */ 611 bp = row % bs; /* Block Position */ 612 M = ai[bn+1] - ai[bn]; 613 *nz = bs*M; 614 615 if (v) { 616 *v = 0; 617 if (*nz) { 618 *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v); 619 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 620 v_i = *v + i*bs; 621 aa_i = aa + bs2*(ai[bn] + i); 622 for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];} 623 } 624 } 625 } 626 627 if (idx) { 628 *idx = 0; 629 if (*nz) { 630 *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx); 631 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 632 idx_i = *idx + i*bs; 633 itmp = bs*aj[ai[bn] + i]; 634 for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;} 635 } 636 } 637 } 638 return 0; 639 } 640 641 #undef __FUNC__ 642 #define __FUNC__ "MatRestoreRow_SeqBAIJ" 643 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 644 { 645 if (idx) {if (*idx) PetscFree(*idx);} 646 if (v) {if (*v) PetscFree(*v);} 647 return 0; 648 } 649 650 #undef __FUNC__ 651 #define __FUNC__ "MatTranspose_SeqBAIJ" 652 static int MatTranspose_SeqBAIJ(Mat A,Mat *B) 653 { 654 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 655 Mat C; 656 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 657 int *rows,*cols,bs2=a->bs2; 658 Scalar *array=a->a; 659 660 if (B==PETSC_NULL && mbs!=nbs) 661 SETERRQ(1,0,"Square matrix only for in-place"); 662 col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col); 663 PetscMemzero(col,(1+nbs)*sizeof(int)); 664 665 for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1; 666 ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr); 667 PetscFree(col); 668 rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows); 669 cols = rows + bs; 670 for ( i=0; i<mbs; i++ ) { 671 cols[0] = i*bs; 672 for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1; 673 len = ai[i+1] - ai[i]; 674 for ( j=0; j<len; j++ ) { 675 rows[0] = (*aj++)*bs; 676 for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1; 677 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr); 678 array += bs2; 679 } 680 } 681 PetscFree(rows); 682 683 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 684 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 685 686 if (B != PETSC_NULL) { 687 *B = C; 688 } else { 689 /* This isn't really an in-place transpose */ 690 PetscFree(a->a); 691 if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);} 692 if (a->diag) PetscFree(a->diag); 693 if (a->ilen) PetscFree(a->ilen); 694 if (a->imax) PetscFree(a->imax); 695 if (a->solve_work) PetscFree(a->solve_work); 696 PetscFree(a); 697 PetscMemcpy(A,C,sizeof(struct _Mat)); 698 PetscHeaderDestroy(C); 699 } 700 return 0; 701 } 702 703 704 #undef __FUNC__ 705 #define __FUNC__ "MatAssemblyEnd_SeqBAIJ" 706 static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 707 { 708 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 709 int fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax; 710 int m = a->m,*ip, N, *ailen = a->ilen; 711 int mbs = a->mbs, bs2 = a->bs2,rmax; 712 Scalar *aa = a->a, *ap; 713 714 if (mode == MAT_FLUSH_ASSEMBLY) return 0; 715 716 rmax = ailen[0]; 717 for ( i=1; i<mbs; i++ ) { 718 /* move each row back by the amount of empty slots (fshift) before it*/ 719 fshift += imax[i-1] - ailen[i-1]; 720 rmax = PetscMax(rmax,ailen[i]); 721 if (fshift) { 722 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 723 N = ailen[i]; 724 for ( j=0; j<N; j++ ) { 725 ip[j-fshift] = ip[j]; 726 PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar)); 727 } 728 } 729 ai[i] = ai[i-1] + ailen[i-1]; 730 } 731 if (mbs) { 732 fshift += imax[mbs-1] - ailen[mbs-1]; 733 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 734 } 735 /* reset ilen and imax for each row */ 736 for ( i=0; i<mbs; i++ ) { 737 ailen[i] = imax[i] = ai[i+1] - ai[i]; 738 } 739 a->nz = ai[mbs]; 740 741 /* diagonals may have moved, so kill the diagonal pointers */ 742 if (fshift && a->diag) { 743 PetscFree(a->diag); 744 PLogObjectMemory(A,-(m+1)*sizeof(int)); 745 a->diag = 0; 746 } 747 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 748 m,a->n,a->bs,fshift*bs2,a->nz*bs2); 749 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n", 750 a->reallocs); 751 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 752 a->reallocs = 0; 753 A->info.nz_unneeded = (double)fshift*bs2; 754 755 return 0; 756 } 757 758 #undef __FUNC__ 759 #define __FUNC__ "MatZeroEntries_SeqBAIJ" 760 static int MatZeroEntries_SeqBAIJ(Mat A) 761 { 762 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 763 PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar)); 764 return 0; 765 } 766 767 #undef __FUNC__ 768 #define __FUNC__ "MatDestroy_SeqBAIJ" 769 int MatDestroy_SeqBAIJ(PetscObject obj) 770 { 771 Mat A = (Mat) obj; 772 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 773 int ierr; 774 775 #if defined(PETSC_LOG) 776 PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 777 #endif 778 PetscFree(a->a); 779 if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);} 780 if (a->diag) PetscFree(a->diag); 781 if (a->ilen) PetscFree(a->ilen); 782 if (a->imax) PetscFree(a->imax); 783 if (a->solve_work) PetscFree(a->solve_work); 784 if (a->mult_work) PetscFree(a->mult_work); 785 PetscFree(a); 786 if (A->mapping) { 787 ierr = ISLocalToGlobalMappingDestroy(A->mapping); CHKERRQ(ierr); 788 } 789 PLogObjectDestroy(A); 790 PetscHeaderDestroy(A); 791 return 0; 792 } 793 794 #undef __FUNC__ 795 #define __FUNC__ "MatSetOption_SeqBAIJ" 796 static int MatSetOption_SeqBAIJ(Mat A,MatOption op) 797 { 798 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 799 if (op == MAT_ROW_ORIENTED) a->roworiented = 1; 800 else if (op == MAT_COLUMN_ORIENTED) a->roworiented = 0; 801 else if (op == MAT_COLUMNS_SORTED) a->sorted = 1; 802 else if (op == MAT_COLUMNS_UNSORTED) a->sorted = 0; 803 else if (op == MAT_NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 804 else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 805 else if (op == MAT_ROWS_SORTED || 806 op == MAT_ROWS_UNSORTED || 807 op == MAT_SYMMETRIC || 808 op == MAT_STRUCTURALLY_SYMMETRIC || 809 op == MAT_YES_NEW_DIAGONALS || 810 op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES) 811 PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n"); 812 else if (op == MAT_NO_NEW_DIAGONALS) 813 {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");} 814 else 815 {SETERRQ(PETSC_ERR_SUP,0,"unknown option");} 816 return 0; 817 } 818 819 820 /* -------------------------------------------------------*/ 821 /* Should check that shapes of vectors and matrices match */ 822 /* -------------------------------------------------------*/ 823 #include "pinclude/plapack.h" 824 825 #undef __FUNC__ 826 #define __FUNC__ "MatMult_SeqBAIJ_1" 827 static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz) 828 { 829 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 830 register Scalar *x,*z,*v,sum; 831 int mbs=a->mbs,i,*idx,*ii,n; 832 833 VecGetArray_Fast(xx,x); 834 VecGetArray_Fast(zz,z); 835 836 idx = a->j; 837 v = a->a; 838 ii = a->i; 839 840 for ( i=0; i<mbs; i++ ) { 841 n = ii[1] - ii[0]; ii++; 842 sum = 0.0; 843 while (n--) sum += *v++ * x[*idx++]; 844 z[i] = sum; 845 } 846 VecRestoreArray_Fast(xx,x); 847 VecRestoreArray_Fast(zz,z); 848 PLogFlops(2*a->nz - a->m); 849 return 0; 850 } 851 852 #undef __FUNC__ 853 #define __FUNC__ "MatMult_SeqBAIJ_2" 854 static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz) 855 { 856 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 857 register Scalar *x,*z,*v,*xb,sum1,sum2; 858 register Scalar x1,x2; 859 int mbs=a->mbs,i,*idx,*ii; 860 int j,n; 861 862 VecGetArray_Fast(xx,x); 863 VecGetArray_Fast(zz,z); 864 865 idx = a->j; 866 v = a->a; 867 ii = a->i; 868 869 for ( i=0; i<mbs; i++ ) { 870 n = ii[1] - ii[0]; ii++; 871 sum1 = 0.0; sum2 = 0.0; 872 for ( j=0; j<n; j++ ) { 873 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 874 sum1 += v[0]*x1 + v[2]*x2; 875 sum2 += v[1]*x1 + v[3]*x2; 876 v += 4; 877 } 878 z[0] = sum1; z[1] = sum2; 879 z += 2; 880 } 881 VecRestoreArray_Fast(xx,x); 882 VecRestoreArray_Fast(zz,z); 883 PLogFlops(4*a->nz - a->m); 884 return 0; 885 } 886 887 #undef __FUNC__ 888 #define __FUNC__ "MatMult_SeqBAIJ_3" 889 static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz) 890 { 891 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 892 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 893 int mbs=a->mbs,i,*idx,*ii,j,n; 894 895 VecGetArray_Fast(xx,x); 896 VecGetArray_Fast(zz,z); 897 898 idx = a->j; 899 v = a->a; 900 ii = a->i; 901 902 for ( i=0; i<mbs; i++ ) { 903 n = ii[1] - ii[0]; ii++; 904 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 905 for ( j=0; j<n; j++ ) { 906 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 907 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 908 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 909 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 910 v += 9; 911 } 912 z[0] = sum1; z[1] = sum2; z[2] = sum3; 913 z += 3; 914 } 915 VecRestoreArray_Fast(xx,x); 916 VecRestoreArray_Fast(zz,z); 917 PLogFlops(18*a->nz - a->m); 918 return 0; 919 } 920 921 #undef __FUNC__ 922 #define __FUNC__ "MatMult_SeqBAIJ_4" 923 static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz) 924 { 925 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 926 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4; 927 register Scalar x1,x2,x3,x4; 928 int mbs=a->mbs,i,*idx,*ii; 929 int j,n; 930 931 VecGetArray_Fast(xx,x); 932 VecGetArray_Fast(zz,z); 933 934 idx = a->j; 935 v = a->a; 936 ii = a->i; 937 938 for ( i=0; i<mbs; i++ ) { 939 n = ii[1] - ii[0]; ii++; 940 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 941 for ( j=0; j<n; j++ ) { 942 xb = x + 4*(*idx++); 943 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 944 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 945 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 946 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 947 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 948 v += 16; 949 } 950 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 951 z += 4; 952 } 953 VecRestoreArray_Fast(xx,x); 954 VecRestoreArray_Fast(zz,z); 955 PLogFlops(32*a->nz - a->m); 956 return 0; 957 } 958 959 #undef __FUNC__ 960 #define __FUNC__ "MatMult_SeqBAIJ_5" 961 static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz) 962 { 963 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 964 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5; 965 register Scalar x1,x2,x3,x4,x5; 966 int mbs=a->mbs,i,*idx,*ii,j,n; 967 968 VecGetArray_Fast(xx,x); 969 VecGetArray_Fast(zz,z); 970 971 idx = a->j; 972 v = a->a; 973 ii = a->i; 974 975 for ( i=0; i<mbs; i++ ) { 976 n = ii[1] - ii[0]; ii++; 977 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 978 for ( j=0; j<n; j++ ) { 979 xb = x + 5*(*idx++); 980 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 981 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 982 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 983 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 984 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 985 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 986 v += 25; 987 } 988 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 989 z += 5; 990 } 991 VecRestoreArray_Fast(xx,x); 992 VecRestoreArray_Fast(zz,z); 993 PLogFlops(50*a->nz - a->m); 994 return 0; 995 } 996 997 #undef __FUNC__ 998 #define __FUNC__ "MatMult_SeqBAIJ_7" 999 static int MatMult_SeqBAIJ_7(Mat A,Vec xx,Vec zz) 1000 { 1001 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1002 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7; 1003 register Scalar x1,x2,x3,x4,x5,x6,x7; 1004 int mbs=a->mbs,i,*idx,*ii,j,n; 1005 1006 VecGetArray_Fast(xx,x); 1007 VecGetArray_Fast(zz,z); 1008 1009 idx = a->j; 1010 v = a->a; 1011 ii = a->i; 1012 1013 for ( i=0; i<mbs; i++ ) { 1014 n = ii[1] - ii[0]; ii++; 1015 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0; 1016 for ( j=0; j<n; j++ ) { 1017 xb = x + 7*(*idx++); 1018 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6]; 1019 sum1 += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 1020 sum2 += v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 1021 sum3 += v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 1022 sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 1023 sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 1024 sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 1025 sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 1026 v += 49; 1027 } 1028 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7; 1029 z += 7; 1030 } 1031 1032 VecRestoreArray_Fast(xx,x); 1033 VecRestoreArray_Fast(zz,z); 1034 PLogFlops(98*a->nz - a->m); 1035 return 0; 1036 } 1037 1038 #undef __FUNC__ 1039 #define __FUNC__ "MatMult_SeqBAIJ_N" 1040 static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz) 1041 { 1042 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1043 register Scalar *x,*z,*v,*xb; 1044 int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2; 1045 int _One = 1,ncols,k; 1046 Scalar _DOne = 1.0,*work,*workt,_DZero = 0.0; 1047 1048 VecGetArray_Fast(xx,x); 1049 VecGetArray_Fast(zz,z); 1050 1051 idx = a->j; 1052 v = a->a; 1053 ii = a->i; 1054 1055 1056 if (!a->mult_work) { 1057 k = PetscMax(a->m,a->n); 1058 a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work); 1059 } 1060 work = a->mult_work; 1061 for ( i=0; i<mbs; i++ ) { 1062 n = ii[1] - ii[0]; ii++; 1063 ncols = n*bs; 1064 workt = work; 1065 for ( j=0; j<n; j++ ) { 1066 xb = x + bs*(*idx++); 1067 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 1068 workt += bs; 1069 } 1070 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One); 1071 v += n*bs2; 1072 z += bs; 1073 } 1074 VecRestoreArray_Fast(xx,x); 1075 VecRestoreArray_Fast(zz,z); 1076 PLogFlops(2*a->nz*bs2 - a->m); 1077 return 0; 1078 } 1079 1080 #undef __FUNC__ 1081 #define __FUNC__ "MatMultAdd_SeqBAIJ_1" 1082 static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 1083 { 1084 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1085 register Scalar *x,*y,*z,*v,sum; 1086 int mbs=a->mbs,i,*idx,*ii,n; 1087 1088 VecGetArray_Fast(xx,x); 1089 VecGetArray_Fast(yy,y); 1090 VecGetArray_Fast(zz,z); 1091 1092 idx = a->j; 1093 v = a->a; 1094 ii = a->i; 1095 1096 for ( i=0; i<mbs; i++ ) { 1097 n = ii[1] - ii[0]; ii++; 1098 sum = y[i]; 1099 while (n--) sum += *v++ * x[*idx++]; 1100 z[i] = sum; 1101 } 1102 VecRestoreArray_Fast(xx,x); 1103 VecRestoreArray_Fast(yy,y); 1104 VecRestoreArray_Fast(zz,z); 1105 PLogFlops(2*a->nz); 1106 return 0; 1107 } 1108 1109 #undef __FUNC__ 1110 #define __FUNC__ "MatMultAdd_SeqBAIJ_2" 1111 static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 1112 { 1113 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1114 register Scalar *x,*y,*z,*v,*xb,sum1,sum2; 1115 register Scalar x1,x2; 1116 int mbs=a->mbs,i,*idx,*ii; 1117 int j,n; 1118 1119 VecGetArray_Fast(xx,x); 1120 VecGetArray_Fast(yy,y); 1121 VecGetArray_Fast(zz,z); 1122 1123 idx = a->j; 1124 v = a->a; 1125 ii = a->i; 1126 1127 for ( i=0; i<mbs; i++ ) { 1128 n = ii[1] - ii[0]; ii++; 1129 sum1 = y[0]; sum2 = y[1]; 1130 for ( j=0; j<n; j++ ) { 1131 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 1132 sum1 += v[0]*x1 + v[2]*x2; 1133 sum2 += v[1]*x1 + v[3]*x2; 1134 v += 4; 1135 } 1136 z[0] = sum1; z[1] = sum2; 1137 z += 2; y += 2; 1138 } 1139 VecRestoreArray_Fast(xx,x); 1140 VecRestoreArray_Fast(yy,y); 1141 VecRestoreArray_Fast(zz,z); 1142 PLogFlops(4*a->nz); 1143 return 0; 1144 } 1145 1146 #undef __FUNC__ 1147 #define __FUNC__ "MatMultAdd_SeqBAIJ_3" 1148 static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 1149 { 1150 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1151 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 1152 int mbs=a->mbs,i,*idx,*ii,j,n; 1153 1154 VecGetArray_Fast(xx,x); 1155 VecGetArray_Fast(yy,y); 1156 VecGetArray_Fast(zz,z); 1157 1158 idx = a->j; 1159 v = a->a; 1160 ii = a->i; 1161 1162 for ( i=0; i<mbs; i++ ) { 1163 n = ii[1] - ii[0]; ii++; 1164 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; 1165 for ( j=0; j<n; j++ ) { 1166 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1167 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 1168 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 1169 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 1170 v += 9; 1171 } 1172 z[0] = sum1; z[1] = sum2; z[2] = sum3; 1173 z += 3; y += 3; 1174 } 1175 VecRestoreArray_Fast(xx,x); 1176 VecRestoreArray_Fast(yy,y); 1177 VecRestoreArray_Fast(zz,z); 1178 PLogFlops(18*a->nz); 1179 return 0; 1180 } 1181 1182 #undef __FUNC__ 1183 #define __FUNC__ "MatMultAdd_SeqBAIJ_4" 1184 static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 1185 { 1186 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1187 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4; 1188 register Scalar x1,x2,x3,x4; 1189 int mbs=a->mbs,i,*idx,*ii; 1190 int j,n; 1191 1192 VecGetArray_Fast(xx,x); 1193 VecGetArray_Fast(yy,y); 1194 VecGetArray_Fast(zz,z); 1195 1196 idx = a->j; 1197 v = a->a; 1198 ii = a->i; 1199 1200 for ( i=0; i<mbs; i++ ) { 1201 n = ii[1] - ii[0]; ii++; 1202 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; 1203 for ( j=0; j<n; j++ ) { 1204 xb = x + 4*(*idx++); 1205 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1206 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 1207 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 1208 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 1209 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 1210 v += 16; 1211 } 1212 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 1213 z += 4; y += 4; 1214 } 1215 VecRestoreArray_Fast(xx,x); 1216 VecRestoreArray_Fast(yy,y); 1217 VecRestoreArray_Fast(zz,z); 1218 PLogFlops(32*a->nz); 1219 return 0; 1220 } 1221 1222 #undef __FUNC__ 1223 #define __FUNC__ "MatMultAdd_SeqBAIJ_5" 1224 static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 1225 { 1226 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1227 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5; 1228 register Scalar x1,x2,x3,x4,x5; 1229 int mbs=a->mbs,i,*idx,*ii,j,n; 1230 1231 VecGetArray_Fast(xx,x); 1232 VecGetArray_Fast(yy,y); 1233 VecGetArray_Fast(zz,z); 1234 1235 idx = a->j; 1236 v = a->a; 1237 ii = a->i; 1238 1239 for ( i=0; i<mbs; i++ ) { 1240 n = ii[1] - ii[0]; ii++; 1241 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; 1242 for ( j=0; j<n; j++ ) { 1243 xb = x + 5*(*idx++); 1244 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 1245 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 1246 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 1247 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 1248 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 1249 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 1250 v += 25; 1251 } 1252 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 1253 z += 5; y += 5; 1254 } 1255 VecRestoreArray_Fast(xx,x); 1256 VecRestoreArray_Fast(yy,y); 1257 VecRestoreArray_Fast(zz,z); 1258 PLogFlops(50*a->nz); 1259 return 0; 1260 } 1261 1262 #undef __FUNC__ 1263 #define __FUNC__ "MatMultAdd_SeqBAIJ_7" 1264 static int MatMultAdd_SeqBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1265 { 1266 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1267 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7; 1268 register Scalar x1,x2,x3,x4,x5,x6,x7; 1269 int mbs=a->mbs,i,*idx,*ii,j,n; 1270 1271 VecGetArray_Fast(xx,x); 1272 VecGetArray_Fast(yy,y); 1273 VecGetArray_Fast(zz,z); 1274 1275 idx = a->j; 1276 v = a->a; 1277 ii = a->i; 1278 1279 for ( i=0; i<mbs; i++ ) { 1280 n = ii[1] - ii[0]; ii++; 1281 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; sum7 = y[6]; 1282 for ( j=0; j<n; j++ ) { 1283 xb = x + 7*(*idx++); 1284 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6]; 1285 sum1 += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 1286 sum2 += v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 1287 sum3 += v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 1288 sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 1289 sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 1290 sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 1291 sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 1292 v += 49; 1293 } 1294 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7; 1295 z += 7; y += 7; 1296 } 1297 VecRestoreArray_Fast(xx,x); 1298 VecRestoreArray_Fast(yy,y); 1299 VecRestoreArray_Fast(zz,z); 1300 PLogFlops(98*a->nz); 1301 return 0; 1302 } 1303 1304 1305 #undef __FUNC__ 1306 #define __FUNC__ "MatMultAdd_SeqBAIJ_N" 1307 static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 1308 { 1309 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1310 register Scalar *x,*z,*v,*xb; 1311 int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr; 1312 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1313 1314 if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 1315 1316 VecGetArray_Fast(xx,x); 1317 VecGetArray_Fast(zz,z); 1318 1319 idx = a->j; 1320 v = a->a; 1321 ii = a->i; 1322 1323 1324 if (!a->mult_work) { 1325 k = PetscMax(a->m,a->n); 1326 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work); 1327 } 1328 work = a->mult_work; 1329 for ( i=0; i<mbs; i++ ) { 1330 n = ii[1] - ii[0]; ii++; 1331 ncols = n*bs; 1332 workt = work; 1333 for ( j=0; j<n; j++ ) { 1334 xb = x + bs*(*idx++); 1335 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 1336 workt += bs; 1337 } 1338 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); 1339 v += n*bs2; 1340 z += bs; 1341 } 1342 VecRestoreArray_Fast(xx,x); 1343 VecRestoreArray_Fast(zz,z); 1344 PLogFlops(2*a->nz*bs2 ); 1345 return 0; 1346 } 1347 1348 #undef __FUNC__ 1349 #define __FUNC__ "MatMultTrans_SeqBAIJ" 1350 static int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz) 1351 { 1352 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1353 Scalar *xg,*zg,*zb; 1354 register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 1355 int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 1356 int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1357 1358 1359 VecGetArray_Fast(xx,xg); x = xg; 1360 VecGetArray_Fast(zz,zg); z = zg; 1361 PetscMemzero(z,N*sizeof(Scalar)); 1362 1363 idx = a->j; 1364 v = a->a; 1365 ii = a->i; 1366 1367 switch (bs) { 1368 case 1: 1369 for ( i=0; i<mbs; i++ ) { 1370 n = ii[1] - ii[0]; ii++; 1371 xb = x + i; x1 = xb[0]; 1372 ib = idx + ai[i]; 1373 for ( j=0; j<n; j++ ) { 1374 rval = ib[j]; 1375 z[rval] += *v++ * x1; 1376 } 1377 } 1378 break; 1379 case 2: 1380 for ( i=0; i<mbs; i++ ) { 1381 n = ii[1] - ii[0]; ii++; 1382 xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1383 ib = idx + ai[i]; 1384 for ( j=0; j<n; j++ ) { 1385 rval = ib[j]*2; 1386 z[rval++] += v[0]*x1 + v[1]*x2; 1387 z[rval++] += v[2]*x1 + v[3]*x2; 1388 v += 4; 1389 } 1390 } 1391 break; 1392 case 3: 1393 for ( i=0; i<mbs; i++ ) { 1394 n = ii[1] - ii[0]; ii++; 1395 xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1396 ib = idx + ai[i]; 1397 for ( j=0; j<n; j++ ) { 1398 rval = ib[j]*3; 1399 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1400 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1401 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1402 v += 9; 1403 } 1404 } 1405 break; 1406 case 4: 1407 for ( i=0; i<mbs; i++ ) { 1408 n = ii[1] - ii[0]; ii++; 1409 xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1410 ib = idx + ai[i]; 1411 for ( j=0; j<n; j++ ) { 1412 rval = ib[j]*4; 1413 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1414 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1415 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1416 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1417 v += 16; 1418 } 1419 } 1420 break; 1421 case 5: 1422 for ( i=0; i<mbs; i++ ) { 1423 n = ii[1] - ii[0]; ii++; 1424 xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1425 x4 = xb[3]; x5 = xb[4]; 1426 ib = idx + ai[i]; 1427 for ( j=0; j<n; j++ ) { 1428 rval = ib[j]*5; 1429 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1430 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1431 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1432 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1433 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1434 v += 25; 1435 } 1436 } 1437 break; 1438 /* block sizes larger then 5 by 5 are handled by BLAS */ 1439 default: { 1440 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1441 if (!a->mult_work) { 1442 k = PetscMax(a->m,a->n); 1443 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1444 CHKPTRQ(a->mult_work); 1445 } 1446 work = a->mult_work; 1447 for ( i=0; i<mbs; i++ ) { 1448 n = ii[1] - ii[0]; ii++; 1449 ncols = n*bs; 1450 PetscMemzero(work,ncols*sizeof(Scalar)); 1451 LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1452 v += n*bs2; 1453 x += bs; 1454 workt = work; 1455 for ( j=0; j<n; j++ ) { 1456 zb = z + bs*(*idx++); 1457 for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1458 workt += bs; 1459 } 1460 } 1461 } 1462 } 1463 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 1464 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1465 PLogFlops(2*a->nz*a->bs2 - a->n); 1466 return 0; 1467 } 1468 1469 #undef __FUNC__ 1470 #define __FUNC__ "MatMultTransAdd_SeqBAIJ" 1471 static int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1472 { 1473 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1474 Scalar *xg,*zg,*zb; 1475 register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 1476 int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 1477 int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1478 1479 1480 1481 VecGetArray_Fast(xx,xg); x = xg; 1482 VecGetArray_Fast(zz,zg); z = zg; 1483 1484 if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 1485 else PetscMemzero(z,N*sizeof(Scalar)); 1486 1487 1488 idx = a->j; 1489 v = a->a; 1490 ii = a->i; 1491 1492 switch (bs) { 1493 case 1: 1494 for ( i=0; i<mbs; i++ ) { 1495 n = ii[1] - ii[0]; ii++; 1496 xb = x + i; x1 = xb[0]; 1497 ib = idx + ai[i]; 1498 for ( j=0; j<n; j++ ) { 1499 rval = ib[j]; 1500 z[rval] += *v++ * x1; 1501 } 1502 } 1503 break; 1504 case 2: 1505 for ( i=0; i<mbs; i++ ) { 1506 n = ii[1] - ii[0]; ii++; 1507 xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1508 ib = idx + ai[i]; 1509 for ( j=0; j<n; j++ ) { 1510 rval = ib[j]*2; 1511 z[rval++] += v[0]*x1 + v[1]*x2; 1512 z[rval++] += v[2]*x1 + v[3]*x2; 1513 v += 4; 1514 } 1515 } 1516 break; 1517 case 3: 1518 for ( i=0; i<mbs; i++ ) { 1519 n = ii[1] - ii[0]; ii++; 1520 xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1521 ib = idx + ai[i]; 1522 for ( j=0; j<n; j++ ) { 1523 rval = ib[j]*3; 1524 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1525 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1526 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1527 v += 9; 1528 } 1529 } 1530 break; 1531 case 4: 1532 for ( i=0; i<mbs; i++ ) { 1533 n = ii[1] - ii[0]; ii++; 1534 xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1535 ib = idx + ai[i]; 1536 for ( j=0; j<n; j++ ) { 1537 rval = ib[j]*4; 1538 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1539 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1540 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1541 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1542 v += 16; 1543 } 1544 } 1545 break; 1546 case 5: 1547 for ( i=0; i<mbs; i++ ) { 1548 n = ii[1] - ii[0]; ii++; 1549 xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1550 x4 = xb[3]; x5 = xb[4]; 1551 ib = idx + ai[i]; 1552 for ( j=0; j<n; j++ ) { 1553 rval = ib[j]*5; 1554 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1555 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1556 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1557 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1558 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1559 v += 25; 1560 } 1561 } 1562 break; 1563 /* block sizes larger then 5 by 5 are handled by BLAS */ 1564 default: { 1565 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1566 if (!a->mult_work) { 1567 k = PetscMax(a->m,a->n); 1568 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1569 CHKPTRQ(a->mult_work); 1570 } 1571 work = a->mult_work; 1572 for ( i=0; i<mbs; i++ ) { 1573 n = ii[1] - ii[0]; ii++; 1574 ncols = n*bs; 1575 PetscMemzero(work,ncols*sizeof(Scalar)); 1576 LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1577 v += n*bs2; 1578 x += bs; 1579 workt = work; 1580 for ( j=0; j<n; j++ ) { 1581 zb = z + bs*(*idx++); 1582 for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1583 workt += bs; 1584 } 1585 } 1586 } 1587 } 1588 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 1589 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1590 PLogFlops(2*a->nz*a->bs2); 1591 return 0; 1592 } 1593 1594 #undef __FUNC__ 1595 #define __FUNC__ "MatGetInfo_SeqBAIJ" 1596 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info) 1597 { 1598 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1599 1600 info->rows_global = (double)a->m; 1601 info->columns_global = (double)a->n; 1602 info->rows_local = (double)a->m; 1603 info->columns_local = (double)a->n; 1604 info->block_size = a->bs2; 1605 info->nz_allocated = a->maxnz; 1606 info->nz_used = a->bs2*a->nz; 1607 info->nz_unneeded = (double)(info->nz_allocated - info->nz_used); 1608 /* if (info->nz_unneeded != A->info.nz_unneeded) 1609 printf("space descrepancy: maxnz-nz = %d, nz_unneeded = %d\n",(int)info->nz_unneeded,(int)A->info.nz_unneeded); */ 1610 info->assemblies = A->num_ass; 1611 info->mallocs = a->reallocs; 1612 info->memory = A->mem; 1613 if (A->factor) { 1614 info->fill_ratio_given = A->info.fill_ratio_given; 1615 info->fill_ratio_needed = A->info.fill_ratio_needed; 1616 info->factor_mallocs = A->info.factor_mallocs; 1617 } else { 1618 info->fill_ratio_given = 0; 1619 info->fill_ratio_needed = 0; 1620 info->factor_mallocs = 0; 1621 } 1622 return 0; 1623 } 1624 1625 #undef __FUNC__ 1626 #define __FUNC__ "MatEqual_SeqBAIJ" 1627 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg) 1628 { 1629 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 1630 1631 if (B->type !=MATSEQBAIJ)SETERRQ(1,0,"Matrices must be same type"); 1632 1633 /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 1634 if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)|| 1635 (a->nz != b->nz)) { 1636 *flg = PETSC_FALSE; return 0; 1637 } 1638 1639 /* if the a->i are the same */ 1640 if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) { 1641 *flg = PETSC_FALSE; return 0; 1642 } 1643 1644 /* if a->j are the same */ 1645 if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) { 1646 *flg = PETSC_FALSE; return 0; 1647 } 1648 1649 /* if a->a are the same */ 1650 if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) { 1651 *flg = PETSC_FALSE; return 0; 1652 } 1653 *flg = PETSC_TRUE; 1654 return 0; 1655 1656 } 1657 1658 #undef __FUNC__ 1659 #define __FUNC__ "MatGetDiagonal_SeqBAIJ" 1660 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v) 1661 { 1662 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1663 int i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 1664 Scalar *x, zero = 0.0,*aa,*aa_j; 1665 1666 if (A->factor) SETERRQ(1,0,"Not for factored matrix"); 1667 bs = a->bs; 1668 aa = a->a; 1669 ai = a->i; 1670 aj = a->j; 1671 ambs = a->mbs; 1672 bs2 = a->bs2; 1673 1674 VecSet(&zero,v); 1675 VecGetArray_Fast(v,x); VecGetLocalSize_Fast(v,n); 1676 if (n != a->m) SETERRQ(1,0,"Nonconforming matrix and vector"); 1677 for ( i=0; i<ambs; i++ ) { 1678 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1679 if (aj[j] == i) { 1680 row = i*bs; 1681 aa_j = aa+j*bs2; 1682 for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 1683 break; 1684 } 1685 } 1686 } 1687 return 0; 1688 } 1689 1690 #undef __FUNC__ 1691 #define __FUNC__ "MatDiagonalScale_SeqBAIJ" 1692 static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr) 1693 { 1694 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1695 Scalar *l,*r,x,*v,*aa,*li,*ri; 1696 int i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2; 1697 1698 ai = a->i; 1699 aj = a->j; 1700 aa = a->a; 1701 m = a->m; 1702 n = a->n; 1703 bs = a->bs; 1704 mbs = a->mbs; 1705 bs2 = a->bs2; 1706 if (ll) { 1707 VecGetArray_Fast(ll,l); VecGetLocalSize_Fast(ll,lm); 1708 if (lm != m) SETERRQ(1,0,"Left scaling vector wrong length"); 1709 for ( i=0; i<mbs; i++ ) { /* for each block row */ 1710 M = ai[i+1] - ai[i]; 1711 li = l + i*bs; 1712 v = aa + bs2*ai[i]; 1713 for ( j=0; j<M; j++ ) { /* for each block */ 1714 for ( k=0; k<bs2; k++ ) { 1715 (*v++) *= li[k%bs]; 1716 } 1717 } 1718 } 1719 } 1720 1721 if (rr) { 1722 VecGetArray_Fast(rr,r); VecGetLocalSize_Fast(rr,rn); 1723 if (rn != n) SETERRQ(1,0,"Right scaling vector wrong length"); 1724 for ( i=0; i<mbs; i++ ) { /* for each block row */ 1725 M = ai[i+1] - ai[i]; 1726 v = aa + bs2*ai[i]; 1727 for ( j=0; j<M; j++ ) { /* for each block */ 1728 ri = r + bs*aj[ai[i]+j]; 1729 for ( k=0; k<bs; k++ ) { 1730 x = ri[k]; 1731 for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x; 1732 } 1733 } 1734 } 1735 } 1736 return 0; 1737 } 1738 1739 1740 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 1741 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 1742 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int); 1743 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*); 1744 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**); 1745 1746 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec); 1747 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec); 1748 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec); 1749 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec); 1750 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec); 1751 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec); 1752 extern int MatSolve_SeqBAIJ_7(Mat,Vec,Vec); 1753 1754 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 1755 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 1756 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 1757 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 1758 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 1759 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 1760 1761 #undef __FUNC__ 1762 #define __FUNC__ "MatNorm_SeqBAIJ" 1763 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm) 1764 { 1765 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1766 Scalar *v = a->a; 1767 double sum = 0.0; 1768 int i,nz=a->nz,bs2=a->bs2; 1769 1770 if (type == NORM_FROBENIUS) { 1771 for (i=0; i< bs2*nz; i++ ) { 1772 #if defined(PETSC_COMPLEX) 1773 sum += real(conj(*v)*(*v)); v++; 1774 #else 1775 sum += (*v)*(*v); v++; 1776 #endif 1777 } 1778 *norm = sqrt(sum); 1779 } 1780 else { 1781 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 1782 } 1783 return 0; 1784 } 1785 1786 /* 1787 note: This can only work for identity for row and col. It would 1788 be good to check this and otherwise generate an error. 1789 */ 1790 #undef __FUNC__ 1791 #define __FUNC__ "MatILUFactor_SeqBAIJ" 1792 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill) 1793 { 1794 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1795 Mat outA; 1796 int ierr; 1797 1798 if (fill != 0) SETERRQ(1,0,"Only fill=0 supported"); 1799 1800 outA = inA; 1801 inA->factor = FACTOR_LU; 1802 a->row = row; 1803 a->col = col; 1804 1805 a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 1806 1807 if (!a->diag) { 1808 ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr); 1809 } 1810 ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr); 1811 return 0; 1812 } 1813 1814 #undef __FUNC__ 1815 #define __FUNC__ "MatScale_SeqBAIJ" 1816 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA) 1817 { 1818 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1819 int one = 1, totalnz = a->bs2*a->nz; 1820 BLscal_( &totalnz, alpha, a->a, &one ); 1821 PLogFlops(totalnz); 1822 return 0; 1823 } 1824 1825 #undef __FUNC__ 1826 #define __FUNC__ "MatGetValues_SeqBAIJ" 1827 static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 1828 { 1829 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1830 int *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 1831 int *ai = a->i, *ailen = a->ilen; 1832 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 1833 Scalar *ap, *aa = a->a, zero = 0.0; 1834 1835 for ( k=0; k<m; k++ ) { /* loop over rows */ 1836 row = im[k]; brow = row/bs; 1837 if (row < 0) SETERRQ(1,0,"Negative row"); 1838 if (row >= a->m) SETERRQ(1,0,"Row too large"); 1839 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1840 nrow = ailen[brow]; 1841 for ( l=0; l<n; l++ ) { /* loop over columns */ 1842 if (in[l] < 0) SETERRQ(1,0,"Negative column"); 1843 if (in[l] >= a->n) SETERRQ(1,0,"Column too large"); 1844 col = in[l] ; 1845 bcol = col/bs; 1846 cidx = col%bs; 1847 ridx = row%bs; 1848 high = nrow; 1849 low = 0; /* assume unsorted */ 1850 while (high-low > 5) { 1851 t = (low+high)/2; 1852 if (rp[t] > bcol) high = t; 1853 else low = t; 1854 } 1855 for ( i=low; i<high; i++ ) { 1856 if (rp[i] > bcol) break; 1857 if (rp[i] == bcol) { 1858 *v++ = ap[bs2*i+bs*cidx+ridx]; 1859 goto finished; 1860 } 1861 } 1862 *v++ = zero; 1863 finished:; 1864 } 1865 } 1866 return 0; 1867 } 1868 1869 #undef __FUNC__ 1870 #define __FUNC__ "MatGetBlockSize_SeqBAIJ" 1871 static int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs) 1872 { 1873 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data; 1874 *bs = baij->bs; 1875 return 0; 1876 } 1877 1878 /* idx should be of length atlease bs */ 1879 #undef __FUNC__ 1880 #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Block" 1881 static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg) 1882 { 1883 int i,row; 1884 row = idx[0]; 1885 if (row%bs!=0) { *flg = PETSC_FALSE; return 0; } 1886 1887 for ( i=1; i<bs; i++ ) { 1888 if (row+i != idx[i]) { *flg = PETSC_FALSE; return 0; } 1889 } 1890 *flg = PETSC_TRUE; 1891 return 0; 1892 } 1893 1894 #undef __FUNC__ 1895 #define __FUNC__ "MatZeroRows_SeqBAIJ" 1896 static int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag) 1897 { 1898 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1899 IS is_local; 1900 int ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2; 1901 PetscTruth flg; 1902 Scalar zero = 0.0,*aa; 1903 1904 /* Make a copy of the IS and sort it */ 1905 ierr = ISGetSize(is,&is_n);CHKERRQ(ierr); 1906 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1907 ierr = ISCreateGeneral(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr); 1908 ierr = ISSort(is_local); CHKERRQ(ierr); 1909 ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr); 1910 1911 i = 0; 1912 while (i < is_n) { 1913 if (rows[i]<0 || rows[i]>m) SETERRQ(1,0,"row out of range"); 1914 flg = PETSC_FALSE; 1915 if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); } 1916 if (flg) { /* There exists a block of rows to be Zerowed */ 1917 baij->ilen[rows[i]/bs] = 0; 1918 i += bs; 1919 } else { /* Zero out only the requested row */ 1920 count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs; 1921 aa = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs); 1922 for ( j=0; j<count; j++ ) { 1923 aa[0] = zero; 1924 aa+=bs; 1925 } 1926 i++; 1927 } 1928 } 1929 if (diag) { 1930 for ( j=0; j<is_n; j++ ) { 1931 ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1932 } 1933 } 1934 ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr); 1935 ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr); 1936 ierr = ISDestroy(is_local); CHKERRQ(ierr); 1937 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1938 1939 return 0; 1940 } 1941 1942 #undef __FUNC__ 1943 #define __FUNC__ "MatPrintHelp_SeqBAIJ" 1944 int MatPrintHelp_SeqBAIJ(Mat A) 1945 { 1946 static int called = 0; 1947 MPI_Comm comm = A->comm; 1948 1949 if (called) return 0; else called = 1; 1950 PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n"); 1951 PetscPrintf(comm," -mat_block_size <block_size>\n"); 1952 return 0; 1953 } 1954 1955 /* -------------------------------------------------------------------*/ 1956 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ, 1957 MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ, 1958 MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N, 1959 MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ, 1960 MatSolve_SeqBAIJ_N,0, 1961 0,0, 1962 MatLUFactor_SeqBAIJ,0, 1963 0, 1964 MatTranspose_SeqBAIJ, 1965 MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ, 1966 MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ, 1967 0,MatAssemblyEnd_SeqBAIJ, 1968 0, 1969 MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ, 1970 MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0, 1971 MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ, 1972 MatILUFactorSymbolic_SeqBAIJ,0, 1973 0,0, 1974 MatConvertSameType_SeqBAIJ,0,0, 1975 MatILUFactor_SeqBAIJ,0,0, 1976 MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ, 1977 MatGetValues_SeqBAIJ,0, 1978 MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ, 1979 0,0,0,MatGetBlockSize_SeqBAIJ, 1980 MatGetRowIJ_SeqBAIJ, 1981 MatRestoreRowIJ_SeqBAIJ, 1982 0,0,0,0,0,0, 1983 MatSetValuesBlocked_SeqBAIJ}; 1984 1985 #undef __FUNC__ 1986 #define __FUNC__ "MatCreateSeqBAIJ" 1987 /*@C 1988 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1989 compressed row) format. For good matrix assembly performance the 1990 user should preallocate the matrix storage by setting the parameter nz 1991 (or the array nzz). By setting these parameters accurately, performance 1992 during matrix assembly can be increased by more than a factor of 50. 1993 1994 Input Parameters: 1995 . comm - MPI communicator, set to MPI_COMM_SELF 1996 . bs - size of block 1997 . m - number of rows 1998 . n - number of columns 1999 . nz - number of block nonzeros per block row (same for all rows) 2000 . nzz - array containing the number of block nonzeros in the various block rows 2001 (possibly different for each block row) or PETSC_NULL 2002 2003 Output Parameter: 2004 . A - the matrix 2005 2006 Options Database Keys: 2007 $ -mat_no_unroll - uses code that does not unroll the loops in the 2008 $ block calculations (much solver) 2009 $ -mat_block_size - size of the blocks to use 2010 2011 Notes: 2012 The block AIJ format is fully compatible with standard Fortran 77 2013 storage. That is, the stored row and column indices can begin at 2014 either one (as in Fortran) or zero. See the users' manual for details. 2015 2016 Specify the preallocated storage with either nz or nnz (not both). 2017 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2018 allocation. For additional details, see the users manual chapter on 2019 matrices. 2020 2021 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2022 @*/ 2023 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 2024 { 2025 Mat B; 2026 Mat_SeqBAIJ *b; 2027 int i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs,size; 2028 2029 MPI_Comm_size(comm,&size); 2030 if (size > 1) SETERRQ(1,0,"Comm must be of size 1"); 2031 2032 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr); 2033 2034 if (mbs*bs!=m || nbs*bs!=n) 2035 SETERRQ(1,0,"Number rows, cols must be divisible by blocksize"); 2036 2037 *A = 0; 2038 PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm); 2039 PLogObjectCreate(B); 2040 B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b); 2041 PetscMemzero(b,sizeof(Mat_SeqBAIJ)); 2042 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 2043 ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr); 2044 if (!flg) { 2045 switch (bs) { 2046 case 1: 2047 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2048 B->ops.solve = MatSolve_SeqBAIJ_1; 2049 B->ops.mult = MatMult_SeqBAIJ_1; 2050 B->ops.multadd = MatMultAdd_SeqBAIJ_1; 2051 break; 2052 case 2: 2053 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2054 B->ops.solve = MatSolve_SeqBAIJ_2; 2055 B->ops.mult = MatMult_SeqBAIJ_2; 2056 B->ops.multadd = MatMultAdd_SeqBAIJ_2; 2057 break; 2058 case 3: 2059 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2060 B->ops.solve = MatSolve_SeqBAIJ_3; 2061 B->ops.mult = MatMult_SeqBAIJ_3; 2062 B->ops.multadd = MatMultAdd_SeqBAIJ_3; 2063 break; 2064 case 4: 2065 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2066 B->ops.solve = MatSolve_SeqBAIJ_4; 2067 B->ops.mult = MatMult_SeqBAIJ_4; 2068 B->ops.multadd = MatMultAdd_SeqBAIJ_4; 2069 break; 2070 case 5: 2071 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2072 B->ops.solve = MatSolve_SeqBAIJ_5; 2073 B->ops.mult = MatMult_SeqBAIJ_5; 2074 B->ops.multadd = MatMultAdd_SeqBAIJ_5; 2075 break; 2076 case 7: 2077 B->ops.mult = MatMult_SeqBAIJ_7; 2078 B->ops.solve = MatSolve_SeqBAIJ_7; 2079 B->ops.multadd = MatMultAdd_SeqBAIJ_7; 2080 break; 2081 } 2082 } 2083 B->destroy = MatDestroy_SeqBAIJ; 2084 B->view = MatView_SeqBAIJ; 2085 B->factor = 0; 2086 B->lupivotthreshold = 1.0; 2087 B->mapping = 0; 2088 b->row = 0; 2089 b->col = 0; 2090 b->reallocs = 0; 2091 2092 b->m = m; B->m = m; B->M = m; 2093 b->n = n; B->n = n; B->N = n; 2094 b->mbs = mbs; 2095 b->nbs = nbs; 2096 b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax); 2097 if (nnz == PETSC_NULL) { 2098 if (nz == PETSC_DEFAULT) nz = 5; 2099 else if (nz <= 0) nz = 1; 2100 for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 2101 nz = nz*mbs; 2102 } 2103 else { 2104 nz = 0; 2105 for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 2106 } 2107 2108 /* allocate the matrix space */ 2109 len = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int); 2110 b->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a); 2111 PetscMemzero(b->a,nz*bs2*sizeof(Scalar)); 2112 b->j = (int *) (b->a + nz*bs2); 2113 PetscMemzero(b->j,nz*sizeof(int)); 2114 b->i = b->j + nz; 2115 b->singlemalloc = 1; 2116 2117 b->i[0] = 0; 2118 for (i=1; i<mbs+1; i++) { 2119 b->i[i] = b->i[i-1] + b->imax[i-1]; 2120 } 2121 2122 /* b->ilen will count nonzeros in each block row so far. */ 2123 b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 2124 PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 2125 for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 2126 2127 b->bs = bs; 2128 b->bs2 = bs2; 2129 b->mbs = mbs; 2130 b->nz = 0; 2131 b->maxnz = nz*bs2; 2132 b->sorted = 0; 2133 b->roworiented = 1; 2134 b->nonew = 0; 2135 b->diag = 0; 2136 b->solve_work = 0; 2137 b->mult_work = 0; 2138 b->spptr = 0; 2139 B->info.nz_unneeded = (double)b->maxnz; 2140 2141 *A = B; 2142 ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr); 2143 if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); } 2144 return 0; 2145 } 2146 2147 #undef __FUNC__ 2148 #define __FUNC__ "MatConvertSameType_SeqBAIJ" 2149 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues) 2150 { 2151 Mat C; 2152 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 2153 int i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 2154 2155 if (a->i[mbs] != nz) SETERRQ(1,0,"Corrupt matrix"); 2156 2157 *B = 0; 2158 PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm); 2159 PLogObjectCreate(C); 2160 C->data = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c); 2161 PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps)); 2162 C->destroy = MatDestroy_SeqBAIJ; 2163 C->view = MatView_SeqBAIJ; 2164 C->factor = A->factor; 2165 c->row = 0; 2166 c->col = 0; 2167 C->assembled = PETSC_TRUE; 2168 2169 c->m = C->m = a->m; 2170 c->n = C->n = a->n; 2171 C->M = a->m; 2172 C->N = a->n; 2173 2174 c->bs = a->bs; 2175 c->bs2 = a->bs2; 2176 c->mbs = a->mbs; 2177 c->nbs = a->nbs; 2178 2179 c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax); 2180 c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen); 2181 for ( i=0; i<mbs; i++ ) { 2182 c->imax[i] = a->imax[i]; 2183 c->ilen[i] = a->ilen[i]; 2184 } 2185 2186 /* allocate the matrix space */ 2187 c->singlemalloc = 1; 2188 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int)); 2189 c->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a); 2190 c->j = (int *) (c->a + nz*bs2); 2191 c->i = c->j + nz; 2192 PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int)); 2193 if (mbs > 0) { 2194 PetscMemcpy(c->j,a->j,nz*sizeof(int)); 2195 if (cpvalues == COPY_VALUES) { 2196 PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar)); 2197 } 2198 } 2199 2200 PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 2201 c->sorted = a->sorted; 2202 c->roworiented = a->roworiented; 2203 c->nonew = a->nonew; 2204 2205 if (a->diag) { 2206 c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag); 2207 PLogObjectMemory(C,(mbs+1)*sizeof(int)); 2208 for ( i=0; i<mbs; i++ ) { 2209 c->diag[i] = a->diag[i]; 2210 } 2211 } 2212 else c->diag = 0; 2213 c->nz = a->nz; 2214 c->maxnz = a->maxnz; 2215 c->solve_work = 0; 2216 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 2217 c->mult_work = 0; 2218 *B = C; 2219 return 0; 2220 } 2221 2222 #undef __FUNC__ 2223 #define __FUNC__ "MatLoad_SeqBAIJ" 2224 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 2225 { 2226 Mat_SeqBAIJ *a; 2227 Mat B; 2228 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg; 2229 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2230 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 2231 int *masked, nmask,tmp,bs2,ishift; 2232 Scalar *aa; 2233 MPI_Comm comm = ((PetscObject) viewer)->comm; 2234 2235 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 2236 bs2 = bs*bs; 2237 2238 MPI_Comm_size(comm,&size); 2239 if (size > 1) SETERRQ(1,0,"view must have one processor"); 2240 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 2241 ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr); 2242 if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not Mat object"); 2243 M = header[1]; N = header[2]; nz = header[3]; 2244 2245 if (M != N) SETERRQ(1,0,"Can only do square matrices"); 2246 2247 /* 2248 This code adds extra rows to make sure the number of rows is 2249 divisible by the blocksize 2250 */ 2251 mbs = M/bs; 2252 extra_rows = bs - M + bs*(mbs); 2253 if (extra_rows == bs) extra_rows = 0; 2254 else mbs++; 2255 if (extra_rows) { 2256 PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 2257 } 2258 2259 /* read in row lengths */ 2260 rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 2261 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 2262 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 2263 2264 /* read in column indices */ 2265 jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj); 2266 ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr); 2267 for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 2268 2269 /* loop over row lengths determining block row lengths */ 2270 browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 2271 PetscMemzero(browlengths,mbs*sizeof(int)); 2272 mask = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask); 2273 PetscMemzero(mask,mbs*sizeof(int)); 2274 masked = mask + mbs; 2275 rowcount = 0; nzcount = 0; 2276 for ( i=0; i<mbs; i++ ) { 2277 nmask = 0; 2278 for ( j=0; j<bs; j++ ) { 2279 kmax = rowlengths[rowcount]; 2280 for ( k=0; k<kmax; k++ ) { 2281 tmp = jj[nzcount++]/bs; 2282 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2283 } 2284 rowcount++; 2285 } 2286 browlengths[i] += nmask; 2287 /* zero out the mask elements we set */ 2288 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 2289 } 2290 2291 /* create our matrix */ 2292 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A); 2293 CHKERRQ(ierr); 2294 B = *A; 2295 a = (Mat_SeqBAIJ *) B->data; 2296 2297 /* set matrix "i" values */ 2298 a->i[0] = 0; 2299 for ( i=1; i<= mbs; i++ ) { 2300 a->i[i] = a->i[i-1] + browlengths[i-1]; 2301 a->ilen[i-1] = browlengths[i-1]; 2302 } 2303 a->nz = 0; 2304 for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 2305 2306 /* read in nonzero values */ 2307 aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 2308 ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr); 2309 for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 2310 2311 /* set "a" and "j" values into matrix */ 2312 nzcount = 0; jcount = 0; 2313 for ( i=0; i<mbs; i++ ) { 2314 nzcountb = nzcount; 2315 nmask = 0; 2316 for ( j=0; j<bs; j++ ) { 2317 kmax = rowlengths[i*bs+j]; 2318 for ( k=0; k<kmax; k++ ) { 2319 tmp = jj[nzcount++]/bs; 2320 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2321 } 2322 rowcount++; 2323 } 2324 /* sort the masked values */ 2325 PetscSortInt(nmask,masked); 2326 2327 /* set "j" values into matrix */ 2328 maskcount = 1; 2329 for ( j=0; j<nmask; j++ ) { 2330 a->j[jcount++] = masked[j]; 2331 mask[masked[j]] = maskcount++; 2332 } 2333 /* set "a" values into matrix */ 2334 ishift = bs2*a->i[i]; 2335 for ( j=0; j<bs; j++ ) { 2336 kmax = rowlengths[i*bs+j]; 2337 for ( k=0; k<kmax; k++ ) { 2338 tmp = jj[nzcountb]/bs ; 2339 block = mask[tmp] - 1; 2340 point = jj[nzcountb] - bs*tmp; 2341 idx = ishift + bs2*block + j + bs*point; 2342 a->a[idx] = aa[nzcountb++]; 2343 } 2344 } 2345 /* zero out the mask elements we set */ 2346 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 2347 } 2348 if (jcount != a->nz) SETERRQ(1,0,"Error bad binary matrix"); 2349 2350 PetscFree(rowlengths); 2351 PetscFree(browlengths); 2352 PetscFree(aa); 2353 PetscFree(jj); 2354 PetscFree(mask); 2355 2356 B->assembled = PETSC_TRUE; 2357 2358 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr); 2359 if (flg) { 2360 Viewer tviewer; 2361 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2362 ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO,0);CHKERRQ(ierr); 2363 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2364 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2365 } 2366 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr); 2367 if (flg) { 2368 Viewer tviewer; 2369 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2370 ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO_LONG,0);CHKERRQ(ierr); 2371 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2372 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2373 } 2374 ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr); 2375 if (flg) { 2376 Viewer tviewer; 2377 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2378 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2379 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2380 } 2381 ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr); 2382 if (flg) { 2383 Viewer tviewer; 2384 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2385 ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_MATLAB,"M");CHKERRQ(ierr); 2386 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2387 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2388 } 2389 ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr); 2390 if (flg) { 2391 Viewer tviewer; 2392 ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr); 2393 ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr); 2394 ierr = ViewerFlush(tviewer); CHKERRQ(ierr); 2395 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2396 } 2397 return 0; 2398 } 2399 2400 2401 2402