1 #ifndef lint 2 static char vcid[] = "$Id: baij.c,v 1.88 1997/02/04 01:21:20 balay Exp balay $"; 3 #endif 4 5 /* 6 Defines the basic matrix operations for the BAIJ (compressed row) 7 matrix storage format. 8 */ 9 #include "src/mat/impls/baij/seq/baij.h" 10 #include "src/vec/vecimpl.h" 11 #include "src/inline/spops.h" 12 #include "petsc.h" 13 14 15 /* 16 Adds diagonal pointers to sparse matrix structure. 17 */ 18 19 #undef __FUNC__ 20 #define __FUNC__ "MatMarkDiag_SeqBAIJ" 21 int MatMarkDiag_SeqBAIJ(Mat A) 22 { 23 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 24 int i,j, *diag, m = a->mbs; 25 26 diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag); 27 PLogObjectMemory(A,(m+1)*sizeof(int)); 28 for ( i=0; i<m; i++ ) { 29 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 30 if (a->j[j] == i) { 31 diag[i] = j; 32 break; 33 } 34 } 35 } 36 a->diag = diag; 37 return 0; 38 } 39 40 #include "draw.h" 41 #include "pinclude/pviewer.h" 42 #include "sys.h" 43 44 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 45 46 #undef __FUNC__ 47 #define __FUNC__ "MatGetRowIJ_SeqBAIJ" 48 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 49 PetscTruth *done) 50 { 51 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 52 int ierr, n = a->mbs,i; 53 54 *nn = n; 55 if (!ia) return 0; 56 if (symmetric) { 57 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 58 } else if (oshift == 1) { 59 /* temporarily add 1 to i and j indices */ 60 int nz = a->i[n] + 1; 61 for ( i=0; i<nz; i++ ) a->j[i]++; 62 for ( i=0; i<n+1; i++ ) a->i[i]++; 63 *ia = a->i; *ja = a->j; 64 } else { 65 *ia = a->i; *ja = a->j; 66 } 67 68 return 0; 69 } 70 71 #undef __FUNC__ 72 #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ" 73 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 74 PetscTruth *done) 75 { 76 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 77 int i,n = a->mbs; 78 79 if (!ia) return 0; 80 if (symmetric) { 81 PetscFree(*ia); 82 PetscFree(*ja); 83 } else if (oshift == 1) { 84 int nz = a->i[n]; 85 for ( i=0; i<nz; i++ ) a->j[i]--; 86 for ( i=0; i<n+1; i++ ) a->i[i]--; 87 } 88 return 0; 89 } 90 91 92 #undef __FUNC__ 93 #define __FUNC__ "MatView_SeqBAIJ_Binary" 94 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 95 { 96 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 97 int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 98 Scalar *aa; 99 100 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 101 col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 102 col_lens[0] = MAT_COOKIE; 103 104 col_lens[1] = a->m; 105 col_lens[2] = a->n; 106 col_lens[3] = a->nz*bs2; 107 108 /* store lengths of each row and write (including header) to file */ 109 count = 0; 110 for ( i=0; i<a->mbs; i++ ) { 111 for ( j=0; j<bs; j++ ) { 112 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 113 } 114 } 115 ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr); 116 PetscFree(col_lens); 117 118 /* store column indices (zero start index) */ 119 jj = (int *) PetscMalloc( a->nz*bs2*sizeof(int) ); CHKPTRQ(jj); 120 count = 0; 121 for ( i=0; i<a->mbs; i++ ) { 122 for ( j=0; j<bs; j++ ) { 123 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 124 for ( l=0; l<bs; l++ ) { 125 jj[count++] = bs*a->j[k] + l; 126 } 127 } 128 } 129 } 130 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr); 131 PetscFree(jj); 132 133 /* store nonzero values */ 134 aa = (Scalar *) PetscMalloc(a->nz*bs2*sizeof(Scalar)); CHKPTRQ(aa); 135 count = 0; 136 for ( i=0; i<a->mbs; i++ ) { 137 for ( j=0; j<bs; j++ ) { 138 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 139 for ( l=0; l<bs; l++ ) { 140 aa[count++] = a->a[bs2*k + l*bs + j]; 141 } 142 } 143 } 144 } 145 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr); 146 PetscFree(aa); 147 return 0; 148 } 149 150 #undef __FUNC__ 151 #define __FUNC__ "MatView_SeqBAIJ_ASCII" 152 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 153 { 154 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 155 int ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2; 156 FILE *fd; 157 char *outputname; 158 159 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 160 ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr); 161 ierr = ViewerGetFormat(viewer,&format); 162 if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) { 163 fprintf(fd," block size is %d\n",bs); 164 } 165 else if (format == VIEWER_FORMAT_ASCII_MATLAB) { 166 SETERRQ(1,0,"Matlab format not supported"); 167 } 168 else if (format == VIEWER_FORMAT_ASCII_COMMON) { 169 for ( i=0; i<a->mbs; i++ ) { 170 for ( j=0; j<bs; j++ ) { 171 fprintf(fd,"row %d:",i*bs+j); 172 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 173 for ( l=0; l<bs; l++ ) { 174 #if defined(PETSC_COMPLEX) 175 if (imag(a->a[bs2*k + l*bs + j]) != 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0) 176 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 177 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 178 else if (real(a->a[bs2*k + l*bs + j]) != 0.0) 179 fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 180 #else 181 if (a->a[bs2*k + l*bs + j] != 0.0) 182 fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 183 #endif 184 } 185 } 186 fprintf(fd,"\n"); 187 } 188 } 189 } 190 else { 191 for ( i=0; i<a->mbs; i++ ) { 192 for ( j=0; j<bs; j++ ) { 193 fprintf(fd,"row %d:",i*bs+j); 194 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 195 for ( l=0; l<bs; l++ ) { 196 #if defined(PETSC_COMPLEX) 197 if (imag(a->a[bs2*k + l*bs + j]) != 0.0) { 198 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 199 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 200 } 201 else { 202 fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 203 } 204 #else 205 fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 206 #endif 207 } 208 } 209 fprintf(fd,"\n"); 210 } 211 } 212 } 213 fflush(fd); 214 return 0; 215 } 216 217 #undef __FUNC__ 218 #define __FUNC__ "MatView_SeqBAIJ_Draw" 219 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer) 220 { 221 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *) A->data; 222 int row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2; 223 double xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r; 224 Scalar *aa; 225 Draw draw; 226 DrawButton button; 227 PetscTruth isnull; 228 229 ViewerDrawGetDraw(viewer,&draw); 230 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 231 232 xr = a->n; yr = a->m; h = yr/10.0; w = xr/10.0; 233 xr += w; yr += h; xl = -w; yl = -h; 234 ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr); 235 /* loop over matrix elements drawing boxes */ 236 color = DRAW_BLUE; 237 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 238 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 239 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 240 x_l = a->j[j]*bs; x_r = x_l + 1.0; 241 aa = a->a + j*bs2; 242 for ( k=0; k<bs; k++ ) { 243 for ( l=0; l<bs; l++ ) { 244 if (PetscReal(*aa++) >= 0.) continue; 245 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 246 } 247 } 248 } 249 } 250 color = DRAW_CYAN; 251 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 252 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 253 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 254 x_l = a->j[j]*bs; x_r = x_l + 1.0; 255 aa = a->a + j*bs2; 256 for ( k=0; k<bs; k++ ) { 257 for ( l=0; l<bs; l++ ) { 258 if (PetscReal(*aa++) != 0.) continue; 259 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 260 } 261 } 262 } 263 } 264 265 color = DRAW_RED; 266 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 267 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 268 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 269 x_l = a->j[j]*bs; x_r = x_l + 1.0; 270 aa = a->a + j*bs2; 271 for ( k=0; k<bs; k++ ) { 272 for ( l=0; l<bs; l++ ) { 273 if (PetscReal(*aa++) <= 0.) continue; 274 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 275 } 276 } 277 } 278 } 279 280 DrawFlush(draw); 281 DrawGetPause(draw,&pause); 282 if (pause >= 0) { PetscSleep(pause); return 0;} 283 284 /* allow the matrix to zoom or shrink */ 285 ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0); 286 while (button != BUTTON_RIGHT) { 287 DrawClear(draw); 288 if (button == BUTTON_LEFT) scale = .5; 289 else if (button == BUTTON_CENTER) scale = 2.; 290 xl = scale*(xl + w - xc) + xc - w*scale; 291 xr = scale*(xr - w - xc) + xc + w*scale; 292 yl = scale*(yl + h - yc) + yc - h*scale; 293 yr = scale*(yr - h - yc) + yc + h*scale; 294 w *= scale; h *= scale; 295 ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr); 296 color = DRAW_BLUE; 297 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 298 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 299 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 300 x_l = a->j[j]*bs; x_r = x_l + 1.0; 301 aa = a->a + j*bs2; 302 for ( k=0; k<bs; k++ ) { 303 for ( l=0; l<bs; l++ ) { 304 if (PetscReal(*aa++) >= 0.) continue; 305 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 306 } 307 } 308 } 309 } 310 color = DRAW_CYAN; 311 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 312 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 313 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 314 x_l = a->j[j]*bs; x_r = x_l + 1.0; 315 aa = a->a + j*bs2; 316 for ( k=0; k<bs; k++ ) { 317 for ( l=0; l<bs; l++ ) { 318 if (PetscReal(*aa++) != 0.) continue; 319 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 320 } 321 } 322 } 323 } 324 325 color = DRAW_RED; 326 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 327 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 328 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 329 x_l = a->j[j]*bs; x_r = x_l + 1.0; 330 aa = a->a + j*bs2; 331 for ( k=0; k<bs; k++ ) { 332 for ( l=0; l<bs; l++ ) { 333 if (PetscReal(*aa++) <= 0.) continue; 334 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 335 } 336 } 337 } 338 } 339 ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0); 340 } 341 return 0; 342 } 343 344 #undef __FUNC__ 345 #define __FUNC__ "MatView_SeqBAIJ" 346 static int MatView_SeqBAIJ(PetscObject obj,Viewer viewer) 347 { 348 Mat A = (Mat) obj; 349 ViewerType vtype; 350 int ierr; 351 352 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 353 if (vtype == MATLAB_VIEWER) { 354 SETERRQ(1,0,"Matlab viewer not supported"); 355 } 356 else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){ 357 return MatView_SeqBAIJ_ASCII(A,viewer); 358 } 359 else if (vtype == BINARY_FILE_VIEWER) { 360 return MatView_SeqBAIJ_Binary(A,viewer); 361 } 362 else if (vtype == DRAW_VIEWER) { 363 return MatView_SeqBAIJ_Draw(A,viewer); 364 } 365 return 0; 366 } 367 368 #define CHUNKSIZE 10 369 370 #undef __FUNC__ 371 #define __FUNC__ "MatSetValues_SeqBAIJ" 372 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 373 { 374 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 375 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 376 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 377 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 378 int ridx,cidx,bs2=a->bs2; 379 Scalar *ap,value,*aa=a->a,*bap; 380 381 for ( k=0; k<m; k++ ) { /* loop over added rows */ 382 row = im[k]; brow = row/bs; 383 #if defined(PETSC_BOPT_g) 384 if (row < 0) SETERRQ(1,0,"Negative row"); 385 if (row >= a->m) SETERRQ(1,0,"Row too large"); 386 #endif 387 rp = aj + ai[brow]; 388 ap = aa + bs2*ai[brow]; 389 rmax = imax[brow]; 390 nrow = ailen[brow]; 391 low = 0; 392 for ( l=0; l<n; l++ ) { /* loop over added columns */ 393 #if defined(PETSC_BOPT_g) 394 if (in[l] < 0) SETERRQ(1,0,"Negative column"); 395 if (in[l] >= a->n) SETERRQ(1,0,"Column too large"); 396 #endif 397 col = in[l]; bcol = col/bs; 398 ridx = row % bs; cidx = col % bs; 399 if (roworiented) { 400 value = *v++; 401 } 402 else { 403 value = v[k + l*m]; 404 } 405 if (!sorted) low = 0; high = nrow; 406 while (high-low > 7) { 407 t = (low+high)/2; 408 if (rp[t] > bcol) high = t; 409 else low = t; 410 } 411 for ( i=low; i<high; i++ ) { 412 if (rp[i] > bcol) break; 413 if (rp[i] == bcol) { 414 bap = ap + bs2*i + bs*cidx + ridx; 415 if (is == ADD_VALUES) *bap += value; 416 else *bap = value; 417 goto noinsert; 418 } 419 } 420 if (nonew) goto noinsert; 421 if (nrow >= rmax) { 422 /* there is no extra room in row, therefore enlarge */ 423 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 424 Scalar *new_a; 425 426 /* malloc new storage space */ 427 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 428 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 429 new_j = (int *) (new_a + bs2*new_nz); 430 new_i = new_j + new_nz; 431 432 /* copy over old data into new slots */ 433 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} 434 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 435 PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); 436 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 437 PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, 438 len*sizeof(int)); 439 PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); 440 PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 441 PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), 442 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); 443 /* free up old matrix storage */ 444 PetscFree(a->a); 445 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 446 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 447 a->singlemalloc = 1; 448 449 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 450 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 451 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 452 a->maxnz += bs2*CHUNKSIZE; 453 a->reallocs++; 454 a->nz++; 455 } 456 N = nrow++ - 1; 457 /* shift up all the later entries in this row */ 458 for ( ii=N; ii>=i; ii-- ) { 459 rp[ii+1] = rp[ii]; 460 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 461 } 462 if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 463 rp[i] = bcol; 464 ap[bs2*i + bs*cidx + ridx] = value; 465 noinsert:; 466 low = i; 467 } 468 ailen[brow] = nrow; 469 } 470 return 0; 471 } 472 473 #undef __FUNC__ 474 #define __FUNC__ "MatSetValues_SeqBAIJ" 475 int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 476 { 477 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 478 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 479 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 480 int *aj=a->j,nonew=a->nonew; 481 int bs2=a->bs2,bs=a->bs; 482 Scalar *ap,*value,*aa=a->a,*bap; 483 484 for ( k=0; k<m; k++ ) { /* loop over added rows */ 485 row = im[k]; 486 #if defined(PETSC_BOPT_g) 487 if (row < 0) SETERRQ(1,0,"Negative row"); 488 if (row >= a->mbs) SETERRQ(1,0,"Row too large"); 489 #endif 490 rp = aj + ai[row]; 491 ap = aa + bs2*ai[row]; 492 rmax = imax[row]; 493 nrow = ailen[row]; 494 low = 0; 495 for ( l=0; l<n; l++ ) { /* loop over added columns */ 496 #if defined(PETSC_BOPT_g) 497 if (in[l] < 0) SETERRQ(1,0,"Negative column"); 498 if (in[l] >= a->nbs) SETERRQ(1,0,"Column too large"); 499 #endif 500 col = in[l]; 501 if (roworiented) { 502 value = v; v+=bs2; 503 } 504 else { 505 value = v+(k + l*m)*bs2; 506 } 507 if (!sorted) low = 0; high = nrow; 508 while (high-low > 7) { 509 t = (low+high)/2; 510 if (rp[t] > col) high = t; 511 else low = t; 512 } 513 for ( i=low; i<high; i++ ) { 514 if (rp[i] > col) break; 515 if (rp[i] == col) { 516 bap = ap + bs2*i; 517 if (is == ADD_VALUES) { 518 for ( ii=0; ii<bs; ii++) 519 for (jj=ii; jj<bs2; jj+=bs ) 520 bap[jj] += *value++; 521 } 522 else { 523 for ( ii=0; ii<bs; ii++) 524 for (jj=ii; jj<bs2; jj+=bs ) 525 bap[jj] += *value++; 526 } 527 goto noinsert; 528 } 529 } 530 if (nonew) goto noinsert; 531 if (nrow >= rmax) { 532 /* there is no extra room in row, therefore enlarge */ 533 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 534 Scalar *new_a; 535 536 /* malloc new storage space */ 537 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 538 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 539 new_j = (int *) (new_a + bs2*new_nz); 540 new_i = new_j + new_nz; 541 542 /* copy over old data into new slots */ 543 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} 544 for ( ii=row+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 545 PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int)); 546 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 547 PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow, 548 len*sizeof(int)); 549 PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(Scalar)); 550 PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 551 PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE), 552 aa+bs2*(ai[row]+nrow),bs2*len*sizeof(Scalar)); 553 /* free up old matrix storage */ 554 PetscFree(a->a); 555 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 556 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 557 a->singlemalloc = 1; 558 559 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 560 rmax = imax[row] = imax[row] + CHUNKSIZE; 561 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 562 a->maxnz += bs2*CHUNKSIZE; 563 a->reallocs++; 564 a->nz++; 565 } 566 N = nrow++ - 1; 567 /* shift up all the later entries in this row */ 568 for ( ii=N; ii>=i; ii-- ) { 569 rp[ii+1] = rp[ii]; 570 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 571 } 572 if (N >= i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 573 rp[i] = col; 574 bap = ap + bs2*i; 575 for ( ii=0; ii<bs; ii++) 576 for (jj=ii; jj<bs2; jj+=bs ) 577 bap[jj] += *value++; 578 noinsert:; 579 low = i; 580 } 581 ailen[row] = nrow; 582 } 583 return 0; 584 } 585 586 #undef __FUNC__ 587 #define __FUNC__ "MatGetSize_SeqBAIJ" 588 static int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 589 { 590 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 591 *m = a->m; *n = a->n; 592 return 0; 593 } 594 595 #undef __FUNC__ 596 #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ" 597 static int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 598 { 599 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 600 *m = 0; *n = a->m; 601 return 0; 602 } 603 604 #undef __FUNC__ 605 #define __FUNC__ "MatGetRow_SeqBAIJ" 606 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 607 { 608 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 609 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 610 Scalar *aa,*v_i,*aa_i; 611 612 bs = a->bs; 613 ai = a->i; 614 aj = a->j; 615 aa = a->a; 616 bs2 = a->bs2; 617 618 if (row < 0 || row >= a->m) SETERRQ(1,0,"Row out of range"); 619 620 bn = row/bs; /* Block number */ 621 bp = row % bs; /* Block Position */ 622 M = ai[bn+1] - ai[bn]; 623 *nz = bs*M; 624 625 if (v) { 626 *v = 0; 627 if (*nz) { 628 *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v); 629 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 630 v_i = *v + i*bs; 631 aa_i = aa + bs2*(ai[bn] + i); 632 for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];} 633 } 634 } 635 } 636 637 if (idx) { 638 *idx = 0; 639 if (*nz) { 640 *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx); 641 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 642 idx_i = *idx + i*bs; 643 itmp = bs*aj[ai[bn] + i]; 644 for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;} 645 } 646 } 647 } 648 return 0; 649 } 650 651 #undef __FUNC__ 652 #define __FUNC__ "MatRestoreRow_SeqBAIJ" 653 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 654 { 655 if (idx) {if (*idx) PetscFree(*idx);} 656 if (v) {if (*v) PetscFree(*v);} 657 return 0; 658 } 659 660 #undef __FUNC__ 661 #define __FUNC__ "MatTranspose_SeqBAIJ" 662 static int MatTranspose_SeqBAIJ(Mat A,Mat *B) 663 { 664 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 665 Mat C; 666 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 667 int *rows,*cols,bs2=a->bs2; 668 Scalar *array=a->a; 669 670 if (B==PETSC_NULL && mbs!=nbs) 671 SETERRQ(1,0,"Square matrix only for in-place"); 672 col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col); 673 PetscMemzero(col,(1+nbs)*sizeof(int)); 674 675 for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1; 676 ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr); 677 PetscFree(col); 678 rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows); 679 cols = rows + bs; 680 for ( i=0; i<mbs; i++ ) { 681 cols[0] = i*bs; 682 for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1; 683 len = ai[i+1] - ai[i]; 684 for ( j=0; j<len; j++ ) { 685 rows[0] = (*aj++)*bs; 686 for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1; 687 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr); 688 array += bs2; 689 } 690 } 691 PetscFree(rows); 692 693 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 694 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 695 696 if (B != PETSC_NULL) { 697 *B = C; 698 } else { 699 /* This isn't really an in-place transpose */ 700 PetscFree(a->a); 701 if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);} 702 if (a->diag) PetscFree(a->diag); 703 if (a->ilen) PetscFree(a->ilen); 704 if (a->imax) PetscFree(a->imax); 705 if (a->solve_work) PetscFree(a->solve_work); 706 PetscFree(a); 707 PetscMemcpy(A,C,sizeof(struct _Mat)); 708 PetscHeaderDestroy(C); 709 } 710 return 0; 711 } 712 713 714 #undef __FUNC__ 715 #define __FUNC__ "MatAssemblyEnd_SeqBAIJ" 716 static int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 717 { 718 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 719 int fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax; 720 int m = a->m,*ip, N, *ailen = a->ilen; 721 int mbs = a->mbs, bs2 = a->bs2,rmax; 722 Scalar *aa = a->a, *ap; 723 724 if (mode == MAT_FLUSH_ASSEMBLY) return 0; 725 726 rmax = ailen[0]; 727 for ( i=1; i<mbs; i++ ) { 728 /* move each row back by the amount of empty slots (fshift) before it*/ 729 fshift += imax[i-1] - ailen[i-1]; 730 rmax = PetscMax(rmax,ailen[i]); 731 if (fshift) { 732 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 733 N = ailen[i]; 734 for ( j=0; j<N; j++ ) { 735 ip[j-fshift] = ip[j]; 736 PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar)); 737 } 738 } 739 ai[i] = ai[i-1] + ailen[i-1]; 740 } 741 if (mbs) { 742 fshift += imax[mbs-1] - ailen[mbs-1]; 743 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 744 } 745 /* reset ilen and imax for each row */ 746 for ( i=0; i<mbs; i++ ) { 747 ailen[i] = imax[i] = ai[i+1] - ai[i]; 748 } 749 a->nz = ai[mbs]; 750 751 /* diagonals may have moved, so kill the diagonal pointers */ 752 if (fshift && a->diag) { 753 PetscFree(a->diag); 754 PLogObjectMemory(A,-(m+1)*sizeof(int)); 755 a->diag = 0; 756 } 757 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 758 m,a->n,a->bs,fshift*bs2,a->nz*bs2); 759 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n", 760 a->reallocs); 761 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 762 a->reallocs = 0; 763 A->info.nz_unneeded = (double)fshift*bs2; 764 765 return 0; 766 } 767 768 #undef __FUNC__ 769 #define __FUNC__ "MatZeroEntries_SeqBAIJ" 770 static int MatZeroEntries_SeqBAIJ(Mat A) 771 { 772 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 773 PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar)); 774 return 0; 775 } 776 777 #undef __FUNC__ 778 #define __FUNC__ "MatDestroy_SeqBAIJ" 779 int MatDestroy_SeqBAIJ(PetscObject obj) 780 { 781 Mat A = (Mat) obj; 782 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 783 int ierr; 784 785 #if defined(PETSC_LOG) 786 PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 787 #endif 788 PetscFree(a->a); 789 if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);} 790 if (a->diag) PetscFree(a->diag); 791 if (a->ilen) PetscFree(a->ilen); 792 if (a->imax) PetscFree(a->imax); 793 if (a->solve_work) PetscFree(a->solve_work); 794 if (a->mult_work) PetscFree(a->mult_work); 795 PetscFree(a); 796 if (A->mapping) { 797 ierr = ISLocalToGlobalMappingDestroy(A->mapping); CHKERRQ(ierr); 798 } 799 PLogObjectDestroy(A); 800 PetscHeaderDestroy(A); 801 return 0; 802 } 803 804 #undef __FUNC__ 805 #define __FUNC__ "MatSetOption_SeqBAIJ" 806 static int MatSetOption_SeqBAIJ(Mat A,MatOption op) 807 { 808 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 809 if (op == MAT_ROW_ORIENTED) a->roworiented = 1; 810 else if (op == MAT_COLUMN_ORIENTED) a->roworiented = 0; 811 else if (op == MAT_COLUMNS_SORTED) a->sorted = 1; 812 else if (op == MAT_COLUMNS_UNSORTED) a->sorted = 0; 813 else if (op == MAT_NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 814 else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 815 else if (op == MAT_ROWS_SORTED || 816 op == MAT_ROWS_UNSORTED || 817 op == MAT_SYMMETRIC || 818 op == MAT_STRUCTURALLY_SYMMETRIC || 819 op == MAT_YES_NEW_DIAGONALS || 820 op == MAT_IGNORE_OFF_PROCESSOR_ENTRIES) 821 PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n"); 822 else if (op == MAT_NO_NEW_DIAGONALS) 823 {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");} 824 else 825 {SETERRQ(PETSC_ERR_SUP,0,"unknown option");} 826 return 0; 827 } 828 829 830 /* -------------------------------------------------------*/ 831 /* Should check that shapes of vectors and matrices match */ 832 /* -------------------------------------------------------*/ 833 #include "pinclude/plapack.h" 834 835 #undef __FUNC__ 836 #define __FUNC__ "MatMult_SeqBAIJ_1" 837 static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz) 838 { 839 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 840 register Scalar *x,*z,*v,sum; 841 int mbs=a->mbs,i,*idx,*ii,n; 842 843 VecGetArray_Fast(xx,x); 844 VecGetArray_Fast(zz,z); 845 846 idx = a->j; 847 v = a->a; 848 ii = a->i; 849 850 for ( i=0; i<mbs; i++ ) { 851 n = ii[1] - ii[0]; ii++; 852 sum = 0.0; 853 while (n--) sum += *v++ * x[*idx++]; 854 z[i] = sum; 855 } 856 VecRestoreArray_Fast(xx,x); 857 VecRestoreArray_Fast(zz,z); 858 PLogFlops(2*a->nz - a->m); 859 return 0; 860 } 861 862 #undef __FUNC__ 863 #define __FUNC__ "MatMult_SeqBAIJ_2" 864 static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz) 865 { 866 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 867 register Scalar *x,*z,*v,*xb,sum1,sum2; 868 register Scalar x1,x2; 869 int mbs=a->mbs,i,*idx,*ii; 870 int j,n; 871 872 VecGetArray_Fast(xx,x); 873 VecGetArray_Fast(zz,z); 874 875 idx = a->j; 876 v = a->a; 877 ii = a->i; 878 879 for ( i=0; i<mbs; i++ ) { 880 n = ii[1] - ii[0]; ii++; 881 sum1 = 0.0; sum2 = 0.0; 882 for ( j=0; j<n; j++ ) { 883 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 884 sum1 += v[0]*x1 + v[2]*x2; 885 sum2 += v[1]*x1 + v[3]*x2; 886 v += 4; 887 } 888 z[0] = sum1; z[1] = sum2; 889 z += 2; 890 } 891 VecRestoreArray_Fast(xx,x); 892 VecRestoreArray_Fast(zz,z); 893 PLogFlops(4*a->nz - a->m); 894 return 0; 895 } 896 897 #undef __FUNC__ 898 #define __FUNC__ "MatMult_SeqBAIJ_3" 899 static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz) 900 { 901 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 902 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 903 int mbs=a->mbs,i,*idx,*ii,j,n; 904 905 VecGetArray_Fast(xx,x); 906 VecGetArray_Fast(zz,z); 907 908 idx = a->j; 909 v = a->a; 910 ii = a->i; 911 912 for ( i=0; i<mbs; i++ ) { 913 n = ii[1] - ii[0]; ii++; 914 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 915 for ( j=0; j<n; j++ ) { 916 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 917 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 918 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 919 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 920 v += 9; 921 } 922 z[0] = sum1; z[1] = sum2; z[2] = sum3; 923 z += 3; 924 } 925 VecRestoreArray_Fast(xx,x); 926 VecRestoreArray_Fast(zz,z); 927 PLogFlops(18*a->nz - a->m); 928 return 0; 929 } 930 931 #undef __FUNC__ 932 #define __FUNC__ "MatMult_SeqBAIJ_4" 933 static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz) 934 { 935 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 936 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4; 937 register Scalar x1,x2,x3,x4; 938 int mbs=a->mbs,i,*idx,*ii; 939 int j,n; 940 941 VecGetArray_Fast(xx,x); 942 VecGetArray_Fast(zz,z); 943 944 idx = a->j; 945 v = a->a; 946 ii = a->i; 947 948 for ( i=0; i<mbs; i++ ) { 949 n = ii[1] - ii[0]; ii++; 950 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 951 for ( j=0; j<n; j++ ) { 952 xb = x + 4*(*idx++); 953 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 954 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 955 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 956 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 957 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 958 v += 16; 959 } 960 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 961 z += 4; 962 } 963 VecRestoreArray_Fast(xx,x); 964 VecRestoreArray_Fast(zz,z); 965 PLogFlops(32*a->nz - a->m); 966 return 0; 967 } 968 969 #undef __FUNC__ 970 #define __FUNC__ "MatMult_SeqBAIJ_5" 971 static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz) 972 { 973 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 974 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5; 975 register Scalar x1,x2,x3,x4,x5; 976 int mbs=a->mbs,i,*idx,*ii,j,n; 977 978 VecGetArray_Fast(xx,x); 979 VecGetArray_Fast(zz,z); 980 981 idx = a->j; 982 v = a->a; 983 ii = a->i; 984 985 for ( i=0; i<mbs; i++ ) { 986 n = ii[1] - ii[0]; ii++; 987 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 988 for ( j=0; j<n; j++ ) { 989 xb = x + 5*(*idx++); 990 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 991 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 992 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 993 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 994 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 995 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 996 v += 25; 997 } 998 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 999 z += 5; 1000 } 1001 VecRestoreArray_Fast(xx,x); 1002 VecRestoreArray_Fast(zz,z); 1003 PLogFlops(50*a->nz - a->m); 1004 return 0; 1005 } 1006 1007 #undef __FUNC__ 1008 #define __FUNC__ "MatMult_SeqBAIJ_7" 1009 static int MatMult_SeqBAIJ_7(Mat A,Vec xx,Vec zz) 1010 { 1011 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1012 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7; 1013 register Scalar x1,x2,x3,x4,x5,x6,x7; 1014 int mbs=a->mbs,i,*idx,*ii,j,n; 1015 1016 VecGetArray_Fast(xx,x); 1017 VecGetArray_Fast(zz,z); 1018 1019 idx = a->j; 1020 v = a->a; 1021 ii = a->i; 1022 1023 for ( i=0; i<mbs; i++ ) { 1024 n = ii[1] - ii[0]; ii++; 1025 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0; 1026 for ( j=0; j<n; j++ ) { 1027 xb = x + 7*(*idx++); 1028 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6]; 1029 sum1 += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 1030 sum2 += v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 1031 sum3 += v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 1032 sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 1033 sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 1034 sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 1035 sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 1036 v += 49; 1037 } 1038 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7; 1039 z += 7; 1040 } 1041 1042 VecRestoreArray_Fast(xx,x); 1043 VecRestoreArray_Fast(zz,z); 1044 PLogFlops(98*a->nz - a->m); 1045 return 0; 1046 } 1047 1048 #undef __FUNC__ 1049 #define __FUNC__ "MatMult_SeqBAIJ_N" 1050 static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz) 1051 { 1052 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1053 register Scalar *x,*z,*v,*xb; 1054 int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2; 1055 int _One = 1,ncols,k; 1056 Scalar _DOne = 1.0,*work,*workt,_DZero = 0.0; 1057 1058 VecGetArray_Fast(xx,x); 1059 VecGetArray_Fast(zz,z); 1060 1061 idx = a->j; 1062 v = a->a; 1063 ii = a->i; 1064 1065 1066 if (!a->mult_work) { 1067 k = PetscMax(a->m,a->n); 1068 a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work); 1069 } 1070 work = a->mult_work; 1071 for ( i=0; i<mbs; i++ ) { 1072 n = ii[1] - ii[0]; ii++; 1073 ncols = n*bs; 1074 workt = work; 1075 for ( j=0; j<n; j++ ) { 1076 xb = x + bs*(*idx++); 1077 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 1078 workt += bs; 1079 } 1080 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One); 1081 v += n*bs2; 1082 z += bs; 1083 } 1084 VecRestoreArray_Fast(xx,x); 1085 VecRestoreArray_Fast(zz,z); 1086 PLogFlops(2*a->nz*bs2 - a->m); 1087 return 0; 1088 } 1089 1090 #undef __FUNC__ 1091 #define __FUNC__ "MatMultAdd_SeqBAIJ_1" 1092 static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 1093 { 1094 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1095 register Scalar *x,*y,*z,*v,sum; 1096 int mbs=a->mbs,i,*idx,*ii,n; 1097 1098 VecGetArray_Fast(xx,x); 1099 VecGetArray_Fast(yy,y); 1100 VecGetArray_Fast(zz,z); 1101 1102 idx = a->j; 1103 v = a->a; 1104 ii = a->i; 1105 1106 for ( i=0; i<mbs; i++ ) { 1107 n = ii[1] - ii[0]; ii++; 1108 sum = y[i]; 1109 while (n--) sum += *v++ * x[*idx++]; 1110 z[i] = sum; 1111 } 1112 VecRestoreArray_Fast(xx,x); 1113 VecRestoreArray_Fast(yy,y); 1114 VecRestoreArray_Fast(zz,z); 1115 PLogFlops(2*a->nz); 1116 return 0; 1117 } 1118 1119 #undef __FUNC__ 1120 #define __FUNC__ "MatMultAdd_SeqBAIJ_2" 1121 static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 1122 { 1123 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1124 register Scalar *x,*y,*z,*v,*xb,sum1,sum2; 1125 register Scalar x1,x2; 1126 int mbs=a->mbs,i,*idx,*ii; 1127 int j,n; 1128 1129 VecGetArray_Fast(xx,x); 1130 VecGetArray_Fast(yy,y); 1131 VecGetArray_Fast(zz,z); 1132 1133 idx = a->j; 1134 v = a->a; 1135 ii = a->i; 1136 1137 for ( i=0; i<mbs; i++ ) { 1138 n = ii[1] - ii[0]; ii++; 1139 sum1 = y[0]; sum2 = y[1]; 1140 for ( j=0; j<n; j++ ) { 1141 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 1142 sum1 += v[0]*x1 + v[2]*x2; 1143 sum2 += v[1]*x1 + v[3]*x2; 1144 v += 4; 1145 } 1146 z[0] = sum1; z[1] = sum2; 1147 z += 2; y += 2; 1148 } 1149 VecRestoreArray_Fast(xx,x); 1150 VecRestoreArray_Fast(yy,y); 1151 VecRestoreArray_Fast(zz,z); 1152 PLogFlops(4*a->nz); 1153 return 0; 1154 } 1155 1156 #undef __FUNC__ 1157 #define __FUNC__ "MatMultAdd_SeqBAIJ_3" 1158 static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 1159 { 1160 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1161 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 1162 int mbs=a->mbs,i,*idx,*ii,j,n; 1163 1164 VecGetArray_Fast(xx,x); 1165 VecGetArray_Fast(yy,y); 1166 VecGetArray_Fast(zz,z); 1167 1168 idx = a->j; 1169 v = a->a; 1170 ii = a->i; 1171 1172 for ( i=0; i<mbs; i++ ) { 1173 n = ii[1] - ii[0]; ii++; 1174 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; 1175 for ( j=0; j<n; j++ ) { 1176 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1177 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 1178 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 1179 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 1180 v += 9; 1181 } 1182 z[0] = sum1; z[1] = sum2; z[2] = sum3; 1183 z += 3; y += 3; 1184 } 1185 VecRestoreArray_Fast(xx,x); 1186 VecRestoreArray_Fast(yy,y); 1187 VecRestoreArray_Fast(zz,z); 1188 PLogFlops(18*a->nz); 1189 return 0; 1190 } 1191 1192 #undef __FUNC__ 1193 #define __FUNC__ "MatMultAdd_SeqBAIJ_4" 1194 static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 1195 { 1196 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1197 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4; 1198 register Scalar x1,x2,x3,x4; 1199 int mbs=a->mbs,i,*idx,*ii; 1200 int j,n; 1201 1202 VecGetArray_Fast(xx,x); 1203 VecGetArray_Fast(yy,y); 1204 VecGetArray_Fast(zz,z); 1205 1206 idx = a->j; 1207 v = a->a; 1208 ii = a->i; 1209 1210 for ( i=0; i<mbs; i++ ) { 1211 n = ii[1] - ii[0]; ii++; 1212 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; 1213 for ( j=0; j<n; j++ ) { 1214 xb = x + 4*(*idx++); 1215 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1216 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 1217 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 1218 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 1219 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 1220 v += 16; 1221 } 1222 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 1223 z += 4; y += 4; 1224 } 1225 VecRestoreArray_Fast(xx,x); 1226 VecRestoreArray_Fast(yy,y); 1227 VecRestoreArray_Fast(zz,z); 1228 PLogFlops(32*a->nz); 1229 return 0; 1230 } 1231 1232 #undef __FUNC__ 1233 #define __FUNC__ "MatMultAdd_SeqBAIJ_5" 1234 static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 1235 { 1236 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1237 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5; 1238 register Scalar x1,x2,x3,x4,x5; 1239 int mbs=a->mbs,i,*idx,*ii,j,n; 1240 1241 VecGetArray_Fast(xx,x); 1242 VecGetArray_Fast(yy,y); 1243 VecGetArray_Fast(zz,z); 1244 1245 idx = a->j; 1246 v = a->a; 1247 ii = a->i; 1248 1249 for ( i=0; i<mbs; i++ ) { 1250 n = ii[1] - ii[0]; ii++; 1251 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; 1252 for ( j=0; j<n; j++ ) { 1253 xb = x + 5*(*idx++); 1254 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 1255 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 1256 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 1257 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 1258 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 1259 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 1260 v += 25; 1261 } 1262 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 1263 z += 5; y += 5; 1264 } 1265 VecRestoreArray_Fast(xx,x); 1266 VecRestoreArray_Fast(yy,y); 1267 VecRestoreArray_Fast(zz,z); 1268 PLogFlops(50*a->nz); 1269 return 0; 1270 } 1271 1272 #undef __FUNC__ 1273 #define __FUNC__ "MatMultAdd_SeqBAIJ_7" 1274 static int MatMultAdd_SeqBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1275 { 1276 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1277 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7; 1278 register Scalar x1,x2,x3,x4,x5,x6,x7; 1279 int mbs=a->mbs,i,*idx,*ii,j,n; 1280 1281 VecGetArray_Fast(xx,x); 1282 VecGetArray_Fast(yy,y); 1283 VecGetArray_Fast(zz,z); 1284 1285 idx = a->j; 1286 v = a->a; 1287 ii = a->i; 1288 1289 for ( i=0; i<mbs; i++ ) { 1290 n = ii[1] - ii[0]; ii++; 1291 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; sum7 = y[6]; 1292 for ( j=0; j<n; j++ ) { 1293 xb = x + 7*(*idx++); 1294 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6]; 1295 sum1 += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 1296 sum2 += v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 1297 sum3 += v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 1298 sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 1299 sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 1300 sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 1301 sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 1302 v += 49; 1303 } 1304 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7; 1305 z += 7; y += 7; 1306 } 1307 VecRestoreArray_Fast(xx,x); 1308 VecRestoreArray_Fast(yy,y); 1309 VecRestoreArray_Fast(zz,z); 1310 PLogFlops(98*a->nz); 1311 return 0; 1312 } 1313 1314 1315 #undef __FUNC__ 1316 #define __FUNC__ "MatMultAdd_SeqBAIJ_N" 1317 static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 1318 { 1319 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1320 register Scalar *x,*z,*v,*xb; 1321 int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr; 1322 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1323 1324 if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 1325 1326 VecGetArray_Fast(xx,x); 1327 VecGetArray_Fast(zz,z); 1328 1329 idx = a->j; 1330 v = a->a; 1331 ii = a->i; 1332 1333 1334 if (!a->mult_work) { 1335 k = PetscMax(a->m,a->n); 1336 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work); 1337 } 1338 work = a->mult_work; 1339 for ( i=0; i<mbs; i++ ) { 1340 n = ii[1] - ii[0]; ii++; 1341 ncols = n*bs; 1342 workt = work; 1343 for ( j=0; j<n; j++ ) { 1344 xb = x + bs*(*idx++); 1345 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 1346 workt += bs; 1347 } 1348 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); 1349 v += n*bs2; 1350 z += bs; 1351 } 1352 VecRestoreArray_Fast(xx,x); 1353 VecRestoreArray_Fast(zz,z); 1354 PLogFlops(2*a->nz*bs2 ); 1355 return 0; 1356 } 1357 1358 #undef __FUNC__ 1359 #define __FUNC__ "MatMultTrans_SeqBAIJ" 1360 static int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz) 1361 { 1362 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1363 Scalar *xg,*zg,*zb; 1364 register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 1365 int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 1366 int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1367 1368 1369 VecGetArray_Fast(xx,xg); x = xg; 1370 VecGetArray_Fast(zz,zg); z = zg; 1371 PetscMemzero(z,N*sizeof(Scalar)); 1372 1373 idx = a->j; 1374 v = a->a; 1375 ii = a->i; 1376 1377 switch (bs) { 1378 case 1: 1379 for ( i=0; i<mbs; i++ ) { 1380 n = ii[1] - ii[0]; ii++; 1381 xb = x + i; x1 = xb[0]; 1382 ib = idx + ai[i]; 1383 for ( j=0; j<n; j++ ) { 1384 rval = ib[j]; 1385 z[rval] += *v++ * x1; 1386 } 1387 } 1388 break; 1389 case 2: 1390 for ( i=0; i<mbs; i++ ) { 1391 n = ii[1] - ii[0]; ii++; 1392 xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1393 ib = idx + ai[i]; 1394 for ( j=0; j<n; j++ ) { 1395 rval = ib[j]*2; 1396 z[rval++] += v[0]*x1 + v[1]*x2; 1397 z[rval++] += v[2]*x1 + v[3]*x2; 1398 v += 4; 1399 } 1400 } 1401 break; 1402 case 3: 1403 for ( i=0; i<mbs; i++ ) { 1404 n = ii[1] - ii[0]; ii++; 1405 xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1406 ib = idx + ai[i]; 1407 for ( j=0; j<n; j++ ) { 1408 rval = ib[j]*3; 1409 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1410 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1411 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1412 v += 9; 1413 } 1414 } 1415 break; 1416 case 4: 1417 for ( i=0; i<mbs; i++ ) { 1418 n = ii[1] - ii[0]; ii++; 1419 xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1420 ib = idx + ai[i]; 1421 for ( j=0; j<n; j++ ) { 1422 rval = ib[j]*4; 1423 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1424 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1425 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1426 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1427 v += 16; 1428 } 1429 } 1430 break; 1431 case 5: 1432 for ( i=0; i<mbs; i++ ) { 1433 n = ii[1] - ii[0]; ii++; 1434 xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1435 x4 = xb[3]; x5 = xb[4]; 1436 ib = idx + ai[i]; 1437 for ( j=0; j<n; j++ ) { 1438 rval = ib[j]*5; 1439 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1440 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1441 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1442 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1443 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1444 v += 25; 1445 } 1446 } 1447 break; 1448 /* block sizes larger then 5 by 5 are handled by BLAS */ 1449 default: { 1450 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1451 if (!a->mult_work) { 1452 k = PetscMax(a->m,a->n); 1453 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1454 CHKPTRQ(a->mult_work); 1455 } 1456 work = a->mult_work; 1457 for ( i=0; i<mbs; i++ ) { 1458 n = ii[1] - ii[0]; ii++; 1459 ncols = n*bs; 1460 PetscMemzero(work,ncols*sizeof(Scalar)); 1461 LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1462 v += n*bs2; 1463 x += bs; 1464 workt = work; 1465 for ( j=0; j<n; j++ ) { 1466 zb = z + bs*(*idx++); 1467 for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1468 workt += bs; 1469 } 1470 } 1471 } 1472 } 1473 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 1474 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1475 PLogFlops(2*a->nz*a->bs2 - a->n); 1476 return 0; 1477 } 1478 1479 #undef __FUNC__ 1480 #define __FUNC__ "MatMultTransAdd_SeqBAIJ" 1481 static int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1482 { 1483 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1484 Scalar *xg,*zg,*zb; 1485 register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 1486 int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 1487 int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1488 1489 1490 1491 VecGetArray_Fast(xx,xg); x = xg; 1492 VecGetArray_Fast(zz,zg); z = zg; 1493 1494 if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 1495 else PetscMemzero(z,N*sizeof(Scalar)); 1496 1497 1498 idx = a->j; 1499 v = a->a; 1500 ii = a->i; 1501 1502 switch (bs) { 1503 case 1: 1504 for ( i=0; i<mbs; i++ ) { 1505 n = ii[1] - ii[0]; ii++; 1506 xb = x + i; x1 = xb[0]; 1507 ib = idx + ai[i]; 1508 for ( j=0; j<n; j++ ) { 1509 rval = ib[j]; 1510 z[rval] += *v++ * x1; 1511 } 1512 } 1513 break; 1514 case 2: 1515 for ( i=0; i<mbs; i++ ) { 1516 n = ii[1] - ii[0]; ii++; 1517 xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1518 ib = idx + ai[i]; 1519 for ( j=0; j<n; j++ ) { 1520 rval = ib[j]*2; 1521 z[rval++] += v[0]*x1 + v[1]*x2; 1522 z[rval++] += v[2]*x1 + v[3]*x2; 1523 v += 4; 1524 } 1525 } 1526 break; 1527 case 3: 1528 for ( i=0; i<mbs; i++ ) { 1529 n = ii[1] - ii[0]; ii++; 1530 xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1531 ib = idx + ai[i]; 1532 for ( j=0; j<n; j++ ) { 1533 rval = ib[j]*3; 1534 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1535 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1536 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1537 v += 9; 1538 } 1539 } 1540 break; 1541 case 4: 1542 for ( i=0; i<mbs; i++ ) { 1543 n = ii[1] - ii[0]; ii++; 1544 xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1545 ib = idx + ai[i]; 1546 for ( j=0; j<n; j++ ) { 1547 rval = ib[j]*4; 1548 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1549 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1550 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1551 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1552 v += 16; 1553 } 1554 } 1555 break; 1556 case 5: 1557 for ( i=0; i<mbs; i++ ) { 1558 n = ii[1] - ii[0]; ii++; 1559 xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1560 x4 = xb[3]; x5 = xb[4]; 1561 ib = idx + ai[i]; 1562 for ( j=0; j<n; j++ ) { 1563 rval = ib[j]*5; 1564 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1565 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1566 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1567 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1568 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1569 v += 25; 1570 } 1571 } 1572 break; 1573 /* block sizes larger then 5 by 5 are handled by BLAS */ 1574 default: { 1575 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1576 if (!a->mult_work) { 1577 k = PetscMax(a->m,a->n); 1578 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1579 CHKPTRQ(a->mult_work); 1580 } 1581 work = a->mult_work; 1582 for ( i=0; i<mbs; i++ ) { 1583 n = ii[1] - ii[0]; ii++; 1584 ncols = n*bs; 1585 PetscMemzero(work,ncols*sizeof(Scalar)); 1586 LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1587 v += n*bs2; 1588 x += bs; 1589 workt = work; 1590 for ( j=0; j<n; j++ ) { 1591 zb = z + bs*(*idx++); 1592 for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1593 workt += bs; 1594 } 1595 } 1596 } 1597 } 1598 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 1599 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1600 PLogFlops(2*a->nz*a->bs2); 1601 return 0; 1602 } 1603 1604 #undef __FUNC__ 1605 #define __FUNC__ "MatGetInfo_SeqBAIJ" 1606 static int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info) 1607 { 1608 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1609 1610 info->rows_global = (double)a->m; 1611 info->columns_global = (double)a->n; 1612 info->rows_local = (double)a->m; 1613 info->columns_local = (double)a->n; 1614 info->block_size = a->bs2; 1615 info->nz_allocated = a->maxnz; 1616 info->nz_used = a->bs2*a->nz; 1617 info->nz_unneeded = (double)(info->nz_allocated - info->nz_used); 1618 /* if (info->nz_unneeded != A->info.nz_unneeded) 1619 printf("space descrepancy: maxnz-nz = %d, nz_unneeded = %d\n",(int)info->nz_unneeded,(int)A->info.nz_unneeded); */ 1620 info->assemblies = A->num_ass; 1621 info->mallocs = a->reallocs; 1622 info->memory = A->mem; 1623 if (A->factor) { 1624 info->fill_ratio_given = A->info.fill_ratio_given; 1625 info->fill_ratio_needed = A->info.fill_ratio_needed; 1626 info->factor_mallocs = A->info.factor_mallocs; 1627 } else { 1628 info->fill_ratio_given = 0; 1629 info->fill_ratio_needed = 0; 1630 info->factor_mallocs = 0; 1631 } 1632 return 0; 1633 } 1634 1635 #undef __FUNC__ 1636 #define __FUNC__ "MatEqual_SeqBAIJ" 1637 static int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg) 1638 { 1639 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 1640 1641 if (B->type !=MATSEQBAIJ)SETERRQ(1,0,"Matrices must be same type"); 1642 1643 /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 1644 if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)|| 1645 (a->nz != b->nz)) { 1646 *flg = PETSC_FALSE; return 0; 1647 } 1648 1649 /* if the a->i are the same */ 1650 if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) { 1651 *flg = PETSC_FALSE; return 0; 1652 } 1653 1654 /* if a->j are the same */ 1655 if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) { 1656 *flg = PETSC_FALSE; return 0; 1657 } 1658 1659 /* if a->a are the same */ 1660 if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) { 1661 *flg = PETSC_FALSE; return 0; 1662 } 1663 *flg = PETSC_TRUE; 1664 return 0; 1665 1666 } 1667 1668 #undef __FUNC__ 1669 #define __FUNC__ "MatGetDiagonal_SeqBAIJ" 1670 static int MatGetDiagonal_SeqBAIJ(Mat A,Vec v) 1671 { 1672 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1673 int i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 1674 Scalar *x, zero = 0.0,*aa,*aa_j; 1675 1676 if (A->factor) SETERRQ(1,0,"Not for factored matrix"); 1677 bs = a->bs; 1678 aa = a->a; 1679 ai = a->i; 1680 aj = a->j; 1681 ambs = a->mbs; 1682 bs2 = a->bs2; 1683 1684 VecSet(&zero,v); 1685 VecGetArray_Fast(v,x); VecGetLocalSize_Fast(v,n); 1686 if (n != a->m) SETERRQ(1,0,"Nonconforming matrix and vector"); 1687 for ( i=0; i<ambs; i++ ) { 1688 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1689 if (aj[j] == i) { 1690 row = i*bs; 1691 aa_j = aa+j*bs2; 1692 for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 1693 break; 1694 } 1695 } 1696 } 1697 return 0; 1698 } 1699 1700 #undef __FUNC__ 1701 #define __FUNC__ "MatDiagonalScale_SeqBAIJ" 1702 static int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr) 1703 { 1704 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1705 Scalar *l,*r,x,*v,*aa,*li,*ri; 1706 int i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2; 1707 1708 ai = a->i; 1709 aj = a->j; 1710 aa = a->a; 1711 m = a->m; 1712 n = a->n; 1713 bs = a->bs; 1714 mbs = a->mbs; 1715 bs2 = a->bs2; 1716 if (ll) { 1717 VecGetArray_Fast(ll,l); VecGetLocalSize_Fast(ll,lm); 1718 if (lm != m) SETERRQ(1,0,"Left scaling vector wrong length"); 1719 for ( i=0; i<mbs; i++ ) { /* for each block row */ 1720 M = ai[i+1] - ai[i]; 1721 li = l + i*bs; 1722 v = aa + bs2*ai[i]; 1723 for ( j=0; j<M; j++ ) { /* for each block */ 1724 for ( k=0; k<bs2; k++ ) { 1725 (*v++) *= li[k%bs]; 1726 } 1727 } 1728 } 1729 } 1730 1731 if (rr) { 1732 VecGetArray_Fast(rr,r); VecGetLocalSize_Fast(rr,rn); 1733 if (rn != n) SETERRQ(1,0,"Right scaling vector wrong length"); 1734 for ( i=0; i<mbs; i++ ) { /* for each block row */ 1735 M = ai[i+1] - ai[i]; 1736 v = aa + bs2*ai[i]; 1737 for ( j=0; j<M; j++ ) { /* for each block */ 1738 ri = r + bs*aj[ai[i]+j]; 1739 for ( k=0; k<bs; k++ ) { 1740 x = ri[k]; 1741 for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x; 1742 } 1743 } 1744 } 1745 } 1746 return 0; 1747 } 1748 1749 1750 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 1751 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 1752 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int); 1753 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*); 1754 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**); 1755 1756 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec); 1757 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec); 1758 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec); 1759 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec); 1760 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec); 1761 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec); 1762 extern int MatSolve_SeqBAIJ_7(Mat,Vec,Vec); 1763 1764 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 1765 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 1766 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 1767 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 1768 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 1769 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 1770 1771 #undef __FUNC__ 1772 #define __FUNC__ "MatNorm_SeqBAIJ" 1773 static int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm) 1774 { 1775 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1776 Scalar *v = a->a; 1777 double sum = 0.0; 1778 int i,nz=a->nz,bs2=a->bs2; 1779 1780 if (type == NORM_FROBENIUS) { 1781 for (i=0; i< bs2*nz; i++ ) { 1782 #if defined(PETSC_COMPLEX) 1783 sum += real(conj(*v)*(*v)); v++; 1784 #else 1785 sum += (*v)*(*v); v++; 1786 #endif 1787 } 1788 *norm = sqrt(sum); 1789 } 1790 else { 1791 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 1792 } 1793 return 0; 1794 } 1795 1796 /* 1797 note: This can only work for identity for row and col. It would 1798 be good to check this and otherwise generate an error. 1799 */ 1800 #undef __FUNC__ 1801 #define __FUNC__ "MatILUFactor_SeqBAIJ" 1802 static int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill) 1803 { 1804 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1805 Mat outA; 1806 int ierr; 1807 1808 if (fill != 0) SETERRQ(1,0,"Only fill=0 supported"); 1809 1810 outA = inA; 1811 inA->factor = FACTOR_LU; 1812 a->row = row; 1813 a->col = col; 1814 1815 a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 1816 1817 if (!a->diag) { 1818 ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr); 1819 } 1820 ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr); 1821 return 0; 1822 } 1823 1824 #undef __FUNC__ 1825 #define __FUNC__ "MatScale_SeqBAIJ" 1826 static int MatScale_SeqBAIJ(Scalar *alpha,Mat inA) 1827 { 1828 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1829 int one = 1, totalnz = a->bs2*a->nz; 1830 BLscal_( &totalnz, alpha, a->a, &one ); 1831 PLogFlops(totalnz); 1832 return 0; 1833 } 1834 1835 #undef __FUNC__ 1836 #define __FUNC__ "MatGetValues_SeqBAIJ" 1837 static int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 1838 { 1839 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1840 int *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 1841 int *ai = a->i, *ailen = a->ilen; 1842 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 1843 Scalar *ap, *aa = a->a, zero = 0.0; 1844 1845 for ( k=0; k<m; k++ ) { /* loop over rows */ 1846 row = im[k]; brow = row/bs; 1847 if (row < 0) SETERRQ(1,0,"Negative row"); 1848 if (row >= a->m) SETERRQ(1,0,"Row too large"); 1849 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1850 nrow = ailen[brow]; 1851 for ( l=0; l<n; l++ ) { /* loop over columns */ 1852 if (in[l] < 0) SETERRQ(1,0,"Negative column"); 1853 if (in[l] >= a->n) SETERRQ(1,0,"Column too large"); 1854 col = in[l] ; 1855 bcol = col/bs; 1856 cidx = col%bs; 1857 ridx = row%bs; 1858 high = nrow; 1859 low = 0; /* assume unsorted */ 1860 while (high-low > 5) { 1861 t = (low+high)/2; 1862 if (rp[t] > bcol) high = t; 1863 else low = t; 1864 } 1865 for ( i=low; i<high; i++ ) { 1866 if (rp[i] > bcol) break; 1867 if (rp[i] == bcol) { 1868 *v++ = ap[bs2*i+bs*cidx+ridx]; 1869 goto finished; 1870 } 1871 } 1872 *v++ = zero; 1873 finished:; 1874 } 1875 } 1876 return 0; 1877 } 1878 1879 #undef __FUNC__ 1880 #define __FUNC__ "MatGetBlockSize_SeqBAIJ" 1881 static int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs) 1882 { 1883 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data; 1884 *bs = baij->bs; 1885 return 0; 1886 } 1887 1888 /* idx should be of length atlease bs */ 1889 #undef __FUNC__ 1890 #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Block" 1891 static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg) 1892 { 1893 int i,row; 1894 row = idx[0]; 1895 if (row%bs!=0) { *flg = PETSC_FALSE; return 0; } 1896 1897 for ( i=1; i<bs; i++ ) { 1898 if (row+i != idx[i]) { *flg = PETSC_FALSE; return 0; } 1899 } 1900 *flg = PETSC_TRUE; 1901 return 0; 1902 } 1903 1904 #undef __FUNC__ 1905 #define __FUNC__ "MatZeroRows_SeqBAIJ" 1906 static int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag) 1907 { 1908 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1909 IS is_local; 1910 int ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2; 1911 PetscTruth flg; 1912 Scalar zero = 0.0,*aa; 1913 1914 /* Make a copy of the IS and sort it */ 1915 ierr = ISGetSize(is,&is_n);CHKERRQ(ierr); 1916 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1917 ierr = ISCreateGeneral(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr); 1918 ierr = ISSort(is_local); CHKERRQ(ierr); 1919 ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr); 1920 1921 i = 0; 1922 while (i < is_n) { 1923 if (rows[i]<0 || rows[i]>m) SETERRQ(1,0,"row out of range"); 1924 flg = PETSC_FALSE; 1925 if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); } 1926 if (flg) { /* There exists a block of rows to be Zerowed */ 1927 baij->ilen[rows[i]/bs] = 0; 1928 i += bs; 1929 } else { /* Zero out only the requested row */ 1930 count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs; 1931 aa = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs); 1932 for ( j=0; j<count; j++ ) { 1933 aa[0] = zero; 1934 aa+=bs; 1935 } 1936 i++; 1937 } 1938 } 1939 if (diag) { 1940 for ( j=0; j<is_n; j++ ) { 1941 ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1942 } 1943 } 1944 ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr); 1945 ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr); 1946 ierr = ISDestroy(is_local); CHKERRQ(ierr); 1947 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1948 1949 return 0; 1950 } 1951 1952 #undef __FUNC__ 1953 #define __FUNC__ "MatPrintHelp_SeqBAIJ" 1954 int MatPrintHelp_SeqBAIJ(Mat A) 1955 { 1956 static int called = 0; 1957 MPI_Comm comm = A->comm; 1958 1959 if (called) return 0; else called = 1; 1960 PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n"); 1961 PetscPrintf(comm," -mat_block_size <block_size>\n"); 1962 return 0; 1963 } 1964 1965 /* -------------------------------------------------------------------*/ 1966 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ, 1967 MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ, 1968 MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N, 1969 MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ, 1970 MatSolve_SeqBAIJ_N,0, 1971 0,0, 1972 MatLUFactor_SeqBAIJ,0, 1973 0, 1974 MatTranspose_SeqBAIJ, 1975 MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ, 1976 MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ, 1977 0,MatAssemblyEnd_SeqBAIJ, 1978 0, 1979 MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ, 1980 MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0, 1981 MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ, 1982 MatILUFactorSymbolic_SeqBAIJ,0, 1983 0,0, 1984 MatConvertSameType_SeqBAIJ,0,0, 1985 MatILUFactor_SeqBAIJ,0,0, 1986 MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ, 1987 MatGetValues_SeqBAIJ,0, 1988 MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ, 1989 0,0,0,MatGetBlockSize_SeqBAIJ, 1990 MatGetRowIJ_SeqBAIJ, 1991 MatRestoreRowIJ_SeqBAIJ, 1992 0,0,0,0,0,0, 1993 MatSetValuesBlocked_SeqBAIJ}; 1994 1995 #undef __FUNC__ 1996 #define __FUNC__ "MatCreateSeqBAIJ" 1997 /*@C 1998 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1999 compressed row) format. For good matrix assembly performance the 2000 user should preallocate the matrix storage by setting the parameter nz 2001 (or the array nzz). By setting these parameters accurately, performance 2002 during matrix assembly can be increased by more than a factor of 50. 2003 2004 Input Parameters: 2005 . comm - MPI communicator, set to MPI_COMM_SELF 2006 . bs - size of block 2007 . m - number of rows 2008 . n - number of columns 2009 . nz - number of block nonzeros per block row (same for all rows) 2010 . nzz - array containing the number of block nonzeros in the various block rows 2011 (possibly different for each block row) or PETSC_NULL 2012 2013 Output Parameter: 2014 . A - the matrix 2015 2016 Options Database Keys: 2017 $ -mat_no_unroll - uses code that does not unroll the loops in the 2018 $ block calculations (much solver) 2019 $ -mat_block_size - size of the blocks to use 2020 2021 Notes: 2022 The block AIJ format is fully compatible with standard Fortran 77 2023 storage. That is, the stored row and column indices can begin at 2024 either one (as in Fortran) or zero. See the users' manual for details. 2025 2026 Specify the preallocated storage with either nz or nnz (not both). 2027 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2028 allocation. For additional details, see the users manual chapter on 2029 matrices. 2030 2031 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2032 @*/ 2033 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 2034 { 2035 Mat B; 2036 Mat_SeqBAIJ *b; 2037 int i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs,size; 2038 2039 MPI_Comm_size(comm,&size); 2040 if (size > 1) SETERRQ(1,0,"Comm must be of size 1"); 2041 2042 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr); 2043 2044 if (mbs*bs!=m || nbs*bs!=n) 2045 SETERRQ(1,0,"Number rows, cols must be divisible by blocksize"); 2046 2047 *A = 0; 2048 PetscHeaderCreate(B,_Mat,MAT_COOKIE,MATSEQBAIJ,comm); 2049 PLogObjectCreate(B); 2050 B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b); 2051 PetscMemzero(b,sizeof(Mat_SeqBAIJ)); 2052 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 2053 ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr); 2054 if (!flg) { 2055 switch (bs) { 2056 case 1: 2057 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2058 B->ops.solve = MatSolve_SeqBAIJ_1; 2059 B->ops.mult = MatMult_SeqBAIJ_1; 2060 B->ops.multadd = MatMultAdd_SeqBAIJ_1; 2061 break; 2062 case 2: 2063 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2064 B->ops.solve = MatSolve_SeqBAIJ_2; 2065 B->ops.mult = MatMult_SeqBAIJ_2; 2066 B->ops.multadd = MatMultAdd_SeqBAIJ_2; 2067 break; 2068 case 3: 2069 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2070 B->ops.solve = MatSolve_SeqBAIJ_3; 2071 B->ops.mult = MatMult_SeqBAIJ_3; 2072 B->ops.multadd = MatMultAdd_SeqBAIJ_3; 2073 break; 2074 case 4: 2075 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2076 B->ops.solve = MatSolve_SeqBAIJ_4; 2077 B->ops.mult = MatMult_SeqBAIJ_4; 2078 B->ops.multadd = MatMultAdd_SeqBAIJ_4; 2079 break; 2080 case 5: 2081 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2082 B->ops.solve = MatSolve_SeqBAIJ_5; 2083 B->ops.mult = MatMult_SeqBAIJ_5; 2084 B->ops.multadd = MatMultAdd_SeqBAIJ_5; 2085 break; 2086 case 7: 2087 B->ops.mult = MatMult_SeqBAIJ_7; 2088 B->ops.solve = MatSolve_SeqBAIJ_7; 2089 B->ops.multadd = MatMultAdd_SeqBAIJ_7; 2090 break; 2091 } 2092 } 2093 B->destroy = MatDestroy_SeqBAIJ; 2094 B->view = MatView_SeqBAIJ; 2095 B->factor = 0; 2096 B->lupivotthreshold = 1.0; 2097 B->mapping = 0; 2098 b->row = 0; 2099 b->col = 0; 2100 b->reallocs = 0; 2101 2102 b->m = m; B->m = m; B->M = m; 2103 b->n = n; B->n = n; B->N = n; 2104 b->mbs = mbs; 2105 b->nbs = nbs; 2106 b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax); 2107 if (nnz == PETSC_NULL) { 2108 if (nz == PETSC_DEFAULT) nz = 5; 2109 else if (nz <= 0) nz = 1; 2110 for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 2111 nz = nz*mbs; 2112 } 2113 else { 2114 nz = 0; 2115 for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 2116 } 2117 2118 /* allocate the matrix space */ 2119 len = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int); 2120 b->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a); 2121 PetscMemzero(b->a,nz*bs2*sizeof(Scalar)); 2122 b->j = (int *) (b->a + nz*bs2); 2123 PetscMemzero(b->j,nz*sizeof(int)); 2124 b->i = b->j + nz; 2125 b->singlemalloc = 1; 2126 2127 b->i[0] = 0; 2128 for (i=1; i<mbs+1; i++) { 2129 b->i[i] = b->i[i-1] + b->imax[i-1]; 2130 } 2131 2132 /* b->ilen will count nonzeros in each block row so far. */ 2133 b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 2134 PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 2135 for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 2136 2137 b->bs = bs; 2138 b->bs2 = bs2; 2139 b->mbs = mbs; 2140 b->nz = 0; 2141 b->maxnz = nz*bs2; 2142 b->sorted = 0; 2143 b->roworiented = 1; 2144 b->nonew = 0; 2145 b->diag = 0; 2146 b->solve_work = 0; 2147 b->mult_work = 0; 2148 b->spptr = 0; 2149 B->info.nz_unneeded = (double)b->maxnz; 2150 2151 *A = B; 2152 ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr); 2153 if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); } 2154 return 0; 2155 } 2156 2157 #undef __FUNC__ 2158 #define __FUNC__ "MatConvertSameType_SeqBAIJ" 2159 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues) 2160 { 2161 Mat C; 2162 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 2163 int i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 2164 2165 if (a->i[mbs] != nz) SETERRQ(1,0,"Corrupt matrix"); 2166 2167 *B = 0; 2168 PetscHeaderCreate(C,_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm); 2169 PLogObjectCreate(C); 2170 C->data = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c); 2171 PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps)); 2172 C->destroy = MatDestroy_SeqBAIJ; 2173 C->view = MatView_SeqBAIJ; 2174 C->factor = A->factor; 2175 c->row = 0; 2176 c->col = 0; 2177 C->assembled = PETSC_TRUE; 2178 2179 c->m = C->m = a->m; 2180 c->n = C->n = a->n; 2181 C->M = a->m; 2182 C->N = a->n; 2183 2184 c->bs = a->bs; 2185 c->bs2 = a->bs2; 2186 c->mbs = a->mbs; 2187 c->nbs = a->nbs; 2188 2189 c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax); 2190 c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen); 2191 for ( i=0; i<mbs; i++ ) { 2192 c->imax[i] = a->imax[i]; 2193 c->ilen[i] = a->ilen[i]; 2194 } 2195 2196 /* allocate the matrix space */ 2197 c->singlemalloc = 1; 2198 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int)); 2199 c->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a); 2200 c->j = (int *) (c->a + nz*bs2); 2201 c->i = c->j + nz; 2202 PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int)); 2203 if (mbs > 0) { 2204 PetscMemcpy(c->j,a->j,nz*sizeof(int)); 2205 if (cpvalues == COPY_VALUES) { 2206 PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar)); 2207 } 2208 } 2209 2210 PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _Mat)+sizeof(Mat_SeqBAIJ)); 2211 c->sorted = a->sorted; 2212 c->roworiented = a->roworiented; 2213 c->nonew = a->nonew; 2214 2215 if (a->diag) { 2216 c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag); 2217 PLogObjectMemory(C,(mbs+1)*sizeof(int)); 2218 for ( i=0; i<mbs; i++ ) { 2219 c->diag[i] = a->diag[i]; 2220 } 2221 } 2222 else c->diag = 0; 2223 c->nz = a->nz; 2224 c->maxnz = a->maxnz; 2225 c->solve_work = 0; 2226 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 2227 c->mult_work = 0; 2228 *B = C; 2229 return 0; 2230 } 2231 2232 #undef __FUNC__ 2233 #define __FUNC__ "MatLoad_SeqBAIJ" 2234 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 2235 { 2236 Mat_SeqBAIJ *a; 2237 Mat B; 2238 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg; 2239 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2240 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 2241 int *masked, nmask,tmp,bs2,ishift; 2242 Scalar *aa; 2243 MPI_Comm comm = ((PetscObject) viewer)->comm; 2244 2245 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 2246 bs2 = bs*bs; 2247 2248 MPI_Comm_size(comm,&size); 2249 if (size > 1) SETERRQ(1,0,"view must have one processor"); 2250 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 2251 ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr); 2252 if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not Mat object"); 2253 M = header[1]; N = header[2]; nz = header[3]; 2254 2255 if (M != N) SETERRQ(1,0,"Can only do square matrices"); 2256 2257 /* 2258 This code adds extra rows to make sure the number of rows is 2259 divisible by the blocksize 2260 */ 2261 mbs = M/bs; 2262 extra_rows = bs - M + bs*(mbs); 2263 if (extra_rows == bs) extra_rows = 0; 2264 else mbs++; 2265 if (extra_rows) { 2266 PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 2267 } 2268 2269 /* read in row lengths */ 2270 rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 2271 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 2272 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 2273 2274 /* read in column indices */ 2275 jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj); 2276 ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr); 2277 for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 2278 2279 /* loop over row lengths determining block row lengths */ 2280 browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 2281 PetscMemzero(browlengths,mbs*sizeof(int)); 2282 mask = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask); 2283 PetscMemzero(mask,mbs*sizeof(int)); 2284 masked = mask + mbs; 2285 rowcount = 0; nzcount = 0; 2286 for ( i=0; i<mbs; i++ ) { 2287 nmask = 0; 2288 for ( j=0; j<bs; j++ ) { 2289 kmax = rowlengths[rowcount]; 2290 for ( k=0; k<kmax; k++ ) { 2291 tmp = jj[nzcount++]/bs; 2292 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2293 } 2294 rowcount++; 2295 } 2296 browlengths[i] += nmask; 2297 /* zero out the mask elements we set */ 2298 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 2299 } 2300 2301 /* create our matrix */ 2302 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A); 2303 CHKERRQ(ierr); 2304 B = *A; 2305 a = (Mat_SeqBAIJ *) B->data; 2306 2307 /* set matrix "i" values */ 2308 a->i[0] = 0; 2309 for ( i=1; i<= mbs; i++ ) { 2310 a->i[i] = a->i[i-1] + browlengths[i-1]; 2311 a->ilen[i-1] = browlengths[i-1]; 2312 } 2313 a->nz = 0; 2314 for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 2315 2316 /* read in nonzero values */ 2317 aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 2318 ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr); 2319 for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 2320 2321 /* set "a" and "j" values into matrix */ 2322 nzcount = 0; jcount = 0; 2323 for ( i=0; i<mbs; i++ ) { 2324 nzcountb = nzcount; 2325 nmask = 0; 2326 for ( j=0; j<bs; j++ ) { 2327 kmax = rowlengths[i*bs+j]; 2328 for ( k=0; k<kmax; k++ ) { 2329 tmp = jj[nzcount++]/bs; 2330 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2331 } 2332 rowcount++; 2333 } 2334 /* sort the masked values */ 2335 PetscSortInt(nmask,masked); 2336 2337 /* set "j" values into matrix */ 2338 maskcount = 1; 2339 for ( j=0; j<nmask; j++ ) { 2340 a->j[jcount++] = masked[j]; 2341 mask[masked[j]] = maskcount++; 2342 } 2343 /* set "a" values into matrix */ 2344 ishift = bs2*a->i[i]; 2345 for ( j=0; j<bs; j++ ) { 2346 kmax = rowlengths[i*bs+j]; 2347 for ( k=0; k<kmax; k++ ) { 2348 tmp = jj[nzcountb]/bs ; 2349 block = mask[tmp] - 1; 2350 point = jj[nzcountb] - bs*tmp; 2351 idx = ishift + bs2*block + j + bs*point; 2352 a->a[idx] = aa[nzcountb++]; 2353 } 2354 } 2355 /* zero out the mask elements we set */ 2356 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 2357 } 2358 if (jcount != a->nz) SETERRQ(1,0,"Error bad binary matrix"); 2359 2360 PetscFree(rowlengths); 2361 PetscFree(browlengths); 2362 PetscFree(aa); 2363 PetscFree(jj); 2364 PetscFree(mask); 2365 2366 B->assembled = PETSC_TRUE; 2367 2368 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr); 2369 if (flg) { 2370 Viewer tviewer; 2371 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2372 ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO,0);CHKERRQ(ierr); 2373 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2374 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2375 } 2376 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr); 2377 if (flg) { 2378 Viewer tviewer; 2379 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2380 ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO_LONG,0);CHKERRQ(ierr); 2381 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2382 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2383 } 2384 ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr); 2385 if (flg) { 2386 Viewer tviewer; 2387 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2388 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2389 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2390 } 2391 ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr); 2392 if (flg) { 2393 Viewer tviewer; 2394 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2395 ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_MATLAB,"M");CHKERRQ(ierr); 2396 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2397 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2398 } 2399 ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr); 2400 if (flg) { 2401 Viewer tviewer; 2402 ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr); 2403 ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr); 2404 ierr = ViewerFlush(tviewer); CHKERRQ(ierr); 2405 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2406 } 2407 return 0; 2408 } 2409 2410 2411 2412