1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: baij.c,v 1.107 1997/07/09 20:55:07 balay Exp bsmith $"; 3 #endif 4 5 /* 6 Defines the basic matrix operations for the BAIJ (compressed row) 7 matrix storage format. 8 */ 9 10 #include "pinclude/pviewer.h" 11 #include "sys.h" 12 #include "src/mat/impls/baij/seq/baij.h" 13 #include "src/vec/vecimpl.h" 14 #include "src/inline/spops.h" 15 #include "petsc.h" 16 17 18 /* 19 Adds diagonal pointers to sparse matrix structure. 20 */ 21 22 #undef __FUNC__ 23 #define __FUNC__ "MatMarkDiag_SeqBAIJ" 24 int MatMarkDiag_SeqBAIJ(Mat A) 25 { 26 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 27 int i,j, *diag, m = a->mbs; 28 29 diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag); 30 PLogObjectMemory(A,(m+1)*sizeof(int)); 31 for ( i=0; i<m; i++ ) { 32 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 33 if (a->j[j] == i) { 34 diag[i] = j; 35 break; 36 } 37 } 38 } 39 a->diag = diag; 40 return 0; 41 } 42 43 44 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 45 46 #undef __FUNC__ 47 #define __FUNC__ "MatGetRowIJ_SeqBAIJ" 48 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 49 PetscTruth *done) 50 { 51 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 52 int ierr, n = a->mbs,i; 53 54 *nn = n; 55 if (!ia) return 0; 56 if (symmetric) { 57 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 58 } else if (oshift == 1) { 59 /* temporarily add 1 to i and j indices */ 60 int nz = a->i[n] + 1; 61 for ( i=0; i<nz; i++ ) a->j[i]++; 62 for ( i=0; i<n+1; i++ ) a->i[i]++; 63 *ia = a->i; *ja = a->j; 64 } else { 65 *ia = a->i; *ja = a->j; 66 } 67 68 return 0; 69 } 70 71 #undef __FUNC__ 72 #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ" /* ADIC Ignore */ 73 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 74 PetscTruth *done) 75 { 76 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 77 int i,n = a->mbs; 78 79 if (!ia) return 0; 80 if (symmetric) { 81 PetscFree(*ia); 82 PetscFree(*ja); 83 } else if (oshift == 1) { 84 int nz = a->i[n]; 85 for ( i=0; i<nz; i++ ) a->j[i]--; 86 for ( i=0; i<n+1; i++ ) a->i[i]--; 87 } 88 return 0; 89 } 90 91 92 #undef __FUNC__ 93 #define __FUNC__ "MatView_SeqBAIJ_Binary" /* ADIC Ignore */ 94 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 95 { 96 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 97 int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 98 Scalar *aa; 99 100 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 101 col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 102 col_lens[0] = MAT_COOKIE; 103 104 col_lens[1] = a->m; 105 col_lens[2] = a->n; 106 col_lens[3] = a->nz*bs2; 107 108 /* store lengths of each row and write (including header) to file */ 109 count = 0; 110 for ( i=0; i<a->mbs; i++ ) { 111 for ( j=0; j<bs; j++ ) { 112 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 113 } 114 } 115 ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr); 116 PetscFree(col_lens); 117 118 /* store column indices (zero start index) */ 119 jj = (int *) PetscMalloc( (a->nz+1)*bs2*sizeof(int) ); CHKPTRQ(jj); 120 count = 0; 121 for ( i=0; i<a->mbs; i++ ) { 122 for ( j=0; j<bs; j++ ) { 123 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 124 for ( l=0; l<bs; l++ ) { 125 jj[count++] = bs*a->j[k] + l; 126 } 127 } 128 } 129 } 130 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr); 131 PetscFree(jj); 132 133 /* store nonzero values */ 134 aa = (Scalar *) PetscMalloc((a->nz+1)*bs2*sizeof(Scalar)); CHKPTRQ(aa); 135 count = 0; 136 for ( i=0; i<a->mbs; i++ ) { 137 for ( j=0; j<bs; j++ ) { 138 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 139 for ( l=0; l<bs; l++ ) { 140 aa[count++] = a->a[bs2*k + l*bs + j]; 141 } 142 } 143 } 144 } 145 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr); 146 PetscFree(aa); 147 return 0; 148 } 149 150 #undef __FUNC__ 151 #define __FUNC__ "MatView_SeqBAIJ_ASCII" /* ADIC Ignore */ 152 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 153 { 154 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 155 int ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2; 156 FILE *fd; 157 char *outputname; 158 159 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 160 ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr); 161 ierr = ViewerGetFormat(viewer,&format); 162 if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) { 163 fprintf(fd," block size is %d\n",bs); 164 } 165 else if (format == VIEWER_FORMAT_ASCII_MATLAB) { 166 SETERRQ(1,0,"Matlab format not supported"); 167 } 168 else if (format == VIEWER_FORMAT_ASCII_COMMON) { 169 for ( i=0; i<a->mbs; i++ ) { 170 for ( j=0; j<bs; j++ ) { 171 fprintf(fd,"row %d:",i*bs+j); 172 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 173 for ( l=0; l<bs; l++ ) { 174 #if defined(PETSC_COMPLEX) 175 if (imag(a->a[bs2*k + l*bs + j]) > 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0) 176 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 177 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 178 else if (imag(a->a[bs2*k + l*bs + j]) < 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0) 179 fprintf(fd," %d %g - %g i",bs*a->j[k]+l, 180 real(a->a[bs2*k + l*bs + j]),-imag(a->a[bs2*k + l*bs + j])); 181 else if (real(a->a[bs2*k + l*bs + j]) != 0.0) 182 fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 183 #else 184 if (a->a[bs2*k + l*bs + j] != 0.0) 185 fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 186 #endif 187 } 188 } 189 fprintf(fd,"\n"); 190 } 191 } 192 } 193 else { 194 for ( i=0; i<a->mbs; i++ ) { 195 for ( j=0; j<bs; j++ ) { 196 fprintf(fd,"row %d:",i*bs+j); 197 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 198 for ( l=0; l<bs; l++ ) { 199 #if defined(PETSC_COMPLEX) 200 if (imag(a->a[bs2*k + l*bs + j]) > 0.0) { 201 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 202 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 203 } 204 else if (imag(a->a[bs2*k + l*bs + j]) < 0.0) { 205 fprintf(fd," %d %g - %g i",bs*a->j[k]+l, 206 real(a->a[bs2*k + l*bs + j]),-imag(a->a[bs2*k + l*bs + j])); 207 } 208 else { 209 fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 210 } 211 #else 212 fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 213 #endif 214 } 215 } 216 fprintf(fd,"\n"); 217 } 218 } 219 } 220 fflush(fd); 221 return 0; 222 } 223 224 #undef __FUNC__ 225 #define __FUNC__ "MatView_SeqBAIJ_Draw" /* ADIC Ignore */ 226 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer) 227 { 228 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *) A->data; 229 int row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2; 230 double xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r; 231 Scalar *aa; 232 Draw draw; 233 DrawButton button; 234 PetscTruth isnull; 235 236 ViewerDrawGetDraw(viewer,&draw); 237 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 238 239 xr = a->n; yr = a->m; h = yr/10.0; w = xr/10.0; 240 xr += w; yr += h; xl = -w; yl = -h; 241 ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr); 242 /* loop over matrix elements drawing boxes */ 243 color = DRAW_BLUE; 244 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 245 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 246 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 247 x_l = a->j[j]*bs; x_r = x_l + 1.0; 248 aa = a->a + j*bs2; 249 for ( k=0; k<bs; k++ ) { 250 for ( l=0; l<bs; l++ ) { 251 if (PetscReal(*aa++) >= 0.) continue; 252 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 253 } 254 } 255 } 256 } 257 color = DRAW_CYAN; 258 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 259 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 260 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 261 x_l = a->j[j]*bs; x_r = x_l + 1.0; 262 aa = a->a + j*bs2; 263 for ( k=0; k<bs; k++ ) { 264 for ( l=0; l<bs; l++ ) { 265 if (PetscReal(*aa++) != 0.) continue; 266 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 267 } 268 } 269 } 270 } 271 272 color = DRAW_RED; 273 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 274 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 275 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 276 x_l = a->j[j]*bs; x_r = x_l + 1.0; 277 aa = a->a + j*bs2; 278 for ( k=0; k<bs; k++ ) { 279 for ( l=0; l<bs; l++ ) { 280 if (PetscReal(*aa++) <= 0.) continue; 281 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 282 } 283 } 284 } 285 } 286 287 DrawFlush(draw); 288 DrawGetPause(draw,&pause); 289 if (pause >= 0) { PetscSleep(pause); return 0;} 290 291 /* allow the matrix to zoom or shrink */ 292 ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0); 293 while (button != BUTTON_RIGHT) { 294 DrawClear(draw); 295 if (button == BUTTON_LEFT) scale = .5; 296 else if (button == BUTTON_CENTER) scale = 2.; 297 xl = scale*(xl + w - xc) + xc - w*scale; 298 xr = scale*(xr - w - xc) + xc + w*scale; 299 yl = scale*(yl + h - yc) + yc - h*scale; 300 yr = scale*(yr - h - yc) + yc + h*scale; 301 w *= scale; h *= scale; 302 ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr); 303 color = DRAW_BLUE; 304 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 305 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 306 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 307 x_l = a->j[j]*bs; x_r = x_l + 1.0; 308 aa = a->a + j*bs2; 309 for ( k=0; k<bs; k++ ) { 310 for ( l=0; l<bs; l++ ) { 311 if (PetscReal(*aa++) >= 0.) continue; 312 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 313 } 314 } 315 } 316 } 317 color = DRAW_CYAN; 318 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 319 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 320 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 321 x_l = a->j[j]*bs; x_r = x_l + 1.0; 322 aa = a->a + j*bs2; 323 for ( k=0; k<bs; k++ ) { 324 for ( l=0; l<bs; l++ ) { 325 if (PetscReal(*aa++) != 0.) continue; 326 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 327 } 328 } 329 } 330 } 331 332 color = DRAW_RED; 333 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 334 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 335 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 336 x_l = a->j[j]*bs; x_r = x_l + 1.0; 337 aa = a->a + j*bs2; 338 for ( k=0; k<bs; k++ ) { 339 for ( l=0; l<bs; l++ ) { 340 if (PetscReal(*aa++) <= 0.) continue; 341 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 342 } 343 } 344 } 345 } 346 ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0); 347 } 348 return 0; 349 } 350 351 #undef __FUNC__ 352 #define __FUNC__ "MatView_SeqBAIJ" /* ADIC Ignore */ 353 int MatView_SeqBAIJ(PetscObject obj,Viewer viewer) 354 { 355 Mat A = (Mat) obj; 356 ViewerType vtype; 357 int ierr; 358 359 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 360 if (vtype == MATLAB_VIEWER) { 361 SETERRQ(1,0,"Matlab viewer not supported"); 362 } 363 else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){ 364 return MatView_SeqBAIJ_ASCII(A,viewer); 365 } 366 else if (vtype == BINARY_FILE_VIEWER) { 367 return MatView_SeqBAIJ_Binary(A,viewer); 368 } 369 else if (vtype == DRAW_VIEWER) { 370 return MatView_SeqBAIJ_Draw(A,viewer); 371 } 372 return 0; 373 } 374 375 #define CHUNKSIZE 10 376 377 #undef __FUNC__ 378 #define __FUNC__ "MatSetValues_SeqBAIJ" 379 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 380 { 381 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 382 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 383 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 384 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 385 int ridx,cidx,bs2=a->bs2; 386 Scalar *ap,value,*aa=a->a,*bap; 387 388 for ( k=0; k<m; k++ ) { /* loop over added rows */ 389 row = im[k]; brow = row/bs; 390 #if defined(PETSC_BOPT_g) 391 if (row < 0) SETERRQ(1,0,"Negative row"); 392 if (row >= a->m) SETERRQ(1,0,"Row too large"); 393 #endif 394 rp = aj + ai[brow]; 395 ap = aa + bs2*ai[brow]; 396 rmax = imax[brow]; 397 nrow = ailen[brow]; 398 low = 0; 399 for ( l=0; l<n; l++ ) { /* loop over added columns */ 400 #if defined(PETSC_BOPT_g) 401 if (in[l] < 0) SETERRQ(1,0,"Negative column"); 402 if (in[l] >= a->n) SETERRQ(1,0,"Column too large"); 403 #endif 404 col = in[l]; bcol = col/bs; 405 ridx = row % bs; cidx = col % bs; 406 if (roworiented) { 407 value = *v++; 408 } 409 else { 410 value = v[k + l*m]; 411 } 412 if (!sorted) low = 0; high = nrow; 413 while (high-low > 7) { 414 t = (low+high)/2; 415 if (rp[t] > bcol) high = t; 416 else low = t; 417 } 418 for ( i=low; i<high; i++ ) { 419 if (rp[i] > bcol) break; 420 if (rp[i] == bcol) { 421 bap = ap + bs2*i + bs*cidx + ridx; 422 if (is == ADD_VALUES) *bap += value; 423 else *bap = value; 424 goto noinsert1; 425 } 426 } 427 if (nonew == 1) goto noinsert1; 428 else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); 429 if (nrow >= rmax) { 430 /* there is no extra room in row, therefore enlarge */ 431 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 432 Scalar *new_a; 433 434 if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); 435 436 /* Malloc new storage space */ 437 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 438 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 439 new_j = (int *) (new_a + bs2*new_nz); 440 new_i = new_j + new_nz; 441 442 /* copy over old data into new slots */ 443 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} 444 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 445 PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); 446 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 447 PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, 448 len*sizeof(int)); 449 PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); 450 PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 451 PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), 452 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); 453 /* free up old matrix storage */ 454 PetscFree(a->a); 455 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 456 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 457 a->singlemalloc = 1; 458 459 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 460 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 461 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 462 a->maxnz += bs2*CHUNKSIZE; 463 a->reallocs++; 464 a->nz++; 465 } 466 N = nrow++ - 1; 467 /* shift up all the later entries in this row */ 468 for ( ii=N; ii>=i; ii-- ) { 469 rp[ii+1] = rp[ii]; 470 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 471 } 472 if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 473 rp[i] = bcol; 474 ap[bs2*i + bs*cidx + ridx] = value; 475 noinsert1:; 476 low = i; 477 } 478 ailen[brow] = nrow; 479 } 480 return 0; 481 } 482 483 #undef __FUNC__ 484 #define __FUNC__ "MatSetValuesBlocked_SeqBAIJ" 485 int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 486 { 487 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 488 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 489 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 490 int roworiented=a->roworiented; 491 int *aj=a->j,nonew=a->nonew; 492 int bs2=a->bs2,bs=a->bs,stepval; 493 Scalar *ap,*value=v,*aa=a->a,*bap; 494 495 if (roworiented) { 496 stepval = (n-1)*bs; 497 } else { 498 stepval = (m-1)*bs; 499 } 500 for ( k=0; k<m; k++ ) { /* loop over added rows */ 501 row = im[k]; 502 #if defined(PETSC_BOPT_g) 503 if (row < 0) SETERRQ(1,0,"Negative row"); 504 if (row >= a->mbs) SETERRQ(1,0,"Row too large"); 505 #endif 506 rp = aj + ai[row]; 507 ap = aa + bs2*ai[row]; 508 rmax = imax[row]; 509 nrow = ailen[row]; 510 low = 0; 511 for ( l=0; l<n; l++ ) { /* loop over added columns */ 512 #if defined(PETSC_BOPT_g) 513 if (in[l] < 0) SETERRQ(1,0,"Negative column"); 514 if (in[l] >= a->nbs) SETERRQ(1,0,"Column too large"); 515 #endif 516 col = in[l]; 517 if (roworiented) { 518 value = v + k*(stepval+bs)*bs + l*bs; 519 } else { 520 value = v + l*(stepval+bs)*bs + k*bs; 521 } 522 if (!sorted) low = 0; high = nrow; 523 while (high-low > 7) { 524 t = (low+high)/2; 525 if (rp[t] > col) high = t; 526 else low = t; 527 } 528 for ( i=low; i<high; i++ ) { 529 if (rp[i] > col) break; 530 if (rp[i] == col) { 531 bap = ap + bs2*i; 532 if (roworiented) { 533 if (is == ADD_VALUES) { 534 for ( ii=0; ii<bs; ii++,value+=stepval ) 535 for (jj=ii; jj<bs2; jj+=bs ) 536 bap[jj] += *value++; 537 } else { 538 for ( ii=0; ii<bs; ii++,value+=stepval ) 539 for (jj=ii; jj<bs2; jj+=bs ) 540 bap[jj] = *value++; 541 } 542 } else { 543 if (is == ADD_VALUES) { 544 for ( ii=0; ii<bs; ii++,value+=stepval ) 545 for (jj=0; jj<bs; jj++ ) 546 *bap++ += *value++; 547 } else { 548 for ( ii=0; ii<bs; ii++,value+=stepval ) 549 for (jj=0; jj<bs; jj++ ) 550 *bap++ = *value++; 551 } 552 } 553 goto noinsert2; 554 } 555 } 556 if (nonew == 1) goto noinsert2; 557 else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); 558 if (nrow >= rmax) { 559 /* there is no extra room in row, therefore enlarge */ 560 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 561 Scalar *new_a; 562 563 if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); 564 565 /* malloc new storage space */ 566 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 567 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 568 new_j = (int *) (new_a + bs2*new_nz); 569 new_i = new_j + new_nz; 570 571 /* copy over old data into new slots */ 572 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} 573 for ( ii=row+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 574 PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int)); 575 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 576 PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow, 577 len*sizeof(int)); 578 PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(Scalar)); 579 PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 580 PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE), 581 aa+bs2*(ai[row]+nrow),bs2*len*sizeof(Scalar)); 582 /* free up old matrix storage */ 583 PetscFree(a->a); 584 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 585 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 586 a->singlemalloc = 1; 587 588 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 589 rmax = imax[row] = imax[row] + CHUNKSIZE; 590 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 591 a->maxnz += bs2*CHUNKSIZE; 592 a->reallocs++; 593 a->nz++; 594 } 595 N = nrow++ - 1; 596 /* shift up all the later entries in this row */ 597 for ( ii=N; ii>=i; ii-- ) { 598 rp[ii+1] = rp[ii]; 599 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 600 } 601 if (N >= i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 602 rp[i] = col; 603 bap = ap + bs2*i; 604 if (roworiented) { 605 for ( ii=0; ii<bs; ii++,value+=stepval) 606 for (jj=ii; jj<bs2; jj+=bs ) 607 bap[jj] = *value++; 608 } else { 609 for ( ii=0; ii<bs; ii++,value+=stepval ) 610 for (jj=0; jj<bs; jj++ ) 611 *bap++ = *value++; 612 } 613 noinsert2:; 614 low = i; 615 } 616 ailen[row] = nrow; 617 } 618 return 0; 619 } 620 621 #undef __FUNC__ 622 #define __FUNC__ "MatGetSize_SeqBAIJ" /* ADIC Ignore */ 623 int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 624 { 625 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 626 *m = a->m; *n = a->n; 627 return 0; 628 } 629 630 #undef __FUNC__ 631 #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ" /* ADIC Ignore */ 632 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 633 { 634 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 635 *m = 0; *n = a->m; 636 return 0; 637 } 638 639 #undef __FUNC__ 640 #define __FUNC__ "MatGetRow_SeqBAIJ" 641 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 642 { 643 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 644 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 645 Scalar *aa,*v_i,*aa_i; 646 647 bs = a->bs; 648 ai = a->i; 649 aj = a->j; 650 aa = a->a; 651 bs2 = a->bs2; 652 653 if (row < 0 || row >= a->m) SETERRQ(1,0,"Row out of range"); 654 655 bn = row/bs; /* Block number */ 656 bp = row % bs; /* Block Position */ 657 M = ai[bn+1] - ai[bn]; 658 *nz = bs*M; 659 660 if (v) { 661 *v = 0; 662 if (*nz) { 663 *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v); 664 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 665 v_i = *v + i*bs; 666 aa_i = aa + bs2*(ai[bn] + i); 667 for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];} 668 } 669 } 670 } 671 672 if (idx) { 673 *idx = 0; 674 if (*nz) { 675 *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx); 676 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 677 idx_i = *idx + i*bs; 678 itmp = bs*aj[ai[bn] + i]; 679 for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;} 680 } 681 } 682 } 683 return 0; 684 } 685 686 #undef __FUNC__ 687 #define __FUNC__ "MatRestoreRow_SeqBAIJ" /* ADIC Ignore */ 688 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 689 { 690 if (idx) {if (*idx) PetscFree(*idx);} 691 if (v) {if (*v) PetscFree(*v);} 692 return 0; 693 } 694 695 #undef __FUNC__ 696 #define __FUNC__ "MatTranspose_SeqBAIJ" 697 int MatTranspose_SeqBAIJ(Mat A,Mat *B) 698 { 699 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 700 Mat C; 701 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 702 int *rows,*cols,bs2=a->bs2; 703 Scalar *array=a->a; 704 705 if (B==PETSC_NULL && mbs!=nbs) 706 SETERRQ(1,0,"Square matrix only for in-place"); 707 col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col); 708 PetscMemzero(col,(1+nbs)*sizeof(int)); 709 710 for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1; 711 ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr); 712 PetscFree(col); 713 rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows); 714 cols = rows + bs; 715 for ( i=0; i<mbs; i++ ) { 716 cols[0] = i*bs; 717 for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1; 718 len = ai[i+1] - ai[i]; 719 for ( j=0; j<len; j++ ) { 720 rows[0] = (*aj++)*bs; 721 for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1; 722 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr); 723 array += bs2; 724 } 725 } 726 PetscFree(rows); 727 728 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 729 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 730 731 if (B != PETSC_NULL) { 732 *B = C; 733 } else { 734 /* This isn't really an in-place transpose */ 735 PetscFree(a->a); 736 if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);} 737 if (a->diag) PetscFree(a->diag); 738 if (a->ilen) PetscFree(a->ilen); 739 if (a->imax) PetscFree(a->imax); 740 if (a->solve_work) PetscFree(a->solve_work); 741 PetscFree(a); 742 PetscMemcpy(A,C,sizeof(struct _p_Mat)); 743 PetscHeaderDestroy(C); 744 } 745 return 0; 746 } 747 748 749 #undef __FUNC__ 750 #define __FUNC__ "MatAssemblyEnd_SeqBAIJ" 751 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 752 { 753 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 754 int fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax; 755 int m = a->m,*ip, N, *ailen = a->ilen; 756 int mbs = a->mbs, bs2 = a->bs2,rmax = 0; 757 Scalar *aa = a->a, *ap; 758 759 if (mode == MAT_FLUSH_ASSEMBLY) return 0; 760 761 if (m) rmax = ailen[0]; 762 for ( i=1; i<mbs; i++ ) { 763 /* move each row back by the amount of empty slots (fshift) before it*/ 764 fshift += imax[i-1] - ailen[i-1]; 765 rmax = PetscMax(rmax,ailen[i]); 766 if (fshift) { 767 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 768 N = ailen[i]; 769 for ( j=0; j<N; j++ ) { 770 ip[j-fshift] = ip[j]; 771 PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar)); 772 } 773 } 774 ai[i] = ai[i-1] + ailen[i-1]; 775 } 776 if (mbs) { 777 fshift += imax[mbs-1] - ailen[mbs-1]; 778 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 779 } 780 /* reset ilen and imax for each row */ 781 for ( i=0; i<mbs; i++ ) { 782 ailen[i] = imax[i] = ai[i+1] - ai[i]; 783 } 784 a->nz = ai[mbs]; 785 786 /* diagonals may have moved, so kill the diagonal pointers */ 787 if (fshift && a->diag) { 788 PetscFree(a->diag); 789 PLogObjectMemory(A,-(m+1)*sizeof(int)); 790 a->diag = 0; 791 } 792 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 793 m,a->n,a->bs,fshift*bs2,a->nz*bs2); 794 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n", 795 a->reallocs); 796 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 797 a->reallocs = 0; 798 A->info.nz_unneeded = (double)fshift*bs2; 799 800 return 0; 801 } 802 803 #undef __FUNC__ 804 #define __FUNC__ "MatZeroEntries_SeqBAIJ" 805 int MatZeroEntries_SeqBAIJ(Mat A) 806 { 807 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 808 PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar)); 809 return 0; 810 } 811 812 #undef __FUNC__ 813 #define __FUNC__ "MatDestroy_SeqBAIJ" /* ADIC Ignore */ 814 int MatDestroy_SeqBAIJ(PetscObject obj) 815 { 816 Mat A = (Mat) obj; 817 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 818 819 #if defined(PETSC_LOG) 820 PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 821 #endif 822 PetscFree(a->a); 823 if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);} 824 if (a->diag) PetscFree(a->diag); 825 if (a->ilen) PetscFree(a->ilen); 826 if (a->imax) PetscFree(a->imax); 827 if (a->solve_work) PetscFree(a->solve_work); 828 if (a->mult_work) PetscFree(a->mult_work); 829 PetscFree(a); 830 PLogObjectDestroy(A); 831 PetscHeaderDestroy(A); 832 return 0; 833 } 834 835 #undef __FUNC__ 836 #define __FUNC__ "MatSetOption_SeqBAIJ" /* ADIC Ignore */ 837 int MatSetOption_SeqBAIJ(Mat A,MatOption op) 838 { 839 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 840 if (op == MAT_ROW_ORIENTED) a->roworiented = 1; 841 else if (op == MAT_COLUMN_ORIENTED) a->roworiented = 0; 842 else if (op == MAT_COLUMNS_SORTED) a->sorted = 1; 843 else if (op == MAT_COLUMNS_UNSORTED) a->sorted = 0; 844 else if (op == MAT_NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 845 else if (op == MAT_NEW_NONZERO_LOCATION_ERROR) a->nonew = -1; 846 else if (op == MAT_NEW_NONZERO_ALLOCATION_ERROR) a->nonew = -2; 847 else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 848 else if (op == MAT_ROWS_SORTED || 849 op == MAT_ROWS_UNSORTED || 850 op == MAT_SYMMETRIC || 851 op == MAT_STRUCTURALLY_SYMMETRIC || 852 op == MAT_YES_NEW_DIAGONALS || 853 op == MAT_IGNORE_OFF_PROC_ENTRIES) 854 PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n"); 855 else if (op == MAT_NO_NEW_DIAGONALS) 856 {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");} 857 else 858 {SETERRQ(PETSC_ERR_SUP,0,"unknown option");} 859 return 0; 860 } 861 862 863 /* -------------------------------------------------------*/ 864 /* Should check that shapes of vectors and matrices match */ 865 /* -------------------------------------------------------*/ 866 #include "pinclude/plapack.h" 867 868 #undef __FUNC__ 869 #define __FUNC__ "MatMult_SeqBAIJ_1" 870 static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz) 871 { 872 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 873 register Scalar *x,*z,*v,sum; 874 int mbs=a->mbs,i,*idx,*ii,n; 875 876 VecGetArray_Fast(xx,x); 877 VecGetArray_Fast(zz,z); 878 879 idx = a->j; 880 v = a->a; 881 ii = a->i; 882 883 for ( i=0; i<mbs; i++ ) { 884 n = ii[1] - ii[0]; ii++; 885 sum = 0.0; 886 while (n--) sum += *v++ * x[*idx++]; 887 z[i] = sum; 888 } 889 VecRestoreArray_Fast(xx,x); 890 VecRestoreArray_Fast(zz,z); 891 PLogFlops(2*a->nz - a->m); 892 return 0; 893 } 894 895 #undef __FUNC__ 896 #define __FUNC__ "MatMult_SeqBAIJ_2" 897 static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz) 898 { 899 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 900 register Scalar *x,*z,*v,*xb,sum1,sum2; 901 register Scalar x1,x2; 902 int mbs=a->mbs,i,*idx,*ii; 903 int j,n; 904 905 VecGetArray_Fast(xx,x); 906 VecGetArray_Fast(zz,z); 907 908 idx = a->j; 909 v = a->a; 910 ii = a->i; 911 912 for ( i=0; i<mbs; i++ ) { 913 n = ii[1] - ii[0]; ii++; 914 sum1 = 0.0; sum2 = 0.0; 915 for ( j=0; j<n; j++ ) { 916 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 917 sum1 += v[0]*x1 + v[2]*x2; 918 sum2 += v[1]*x1 + v[3]*x2; 919 v += 4; 920 } 921 z[0] = sum1; z[1] = sum2; 922 z += 2; 923 } 924 VecRestoreArray_Fast(xx,x); 925 VecRestoreArray_Fast(zz,z); 926 PLogFlops(4*a->nz - a->m); 927 return 0; 928 } 929 930 #undef __FUNC__ 931 #define __FUNC__ "MatMult_SeqBAIJ_3" 932 static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz) 933 { 934 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 935 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 936 int mbs=a->mbs,i,*idx,*ii,j,n; 937 938 VecGetArray_Fast(xx,x); 939 VecGetArray_Fast(zz,z); 940 941 idx = a->j; 942 v = a->a; 943 ii = a->i; 944 945 for ( i=0; i<mbs; i++ ) { 946 n = ii[1] - ii[0]; ii++; 947 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 948 for ( j=0; j<n; j++ ) { 949 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 950 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 951 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 952 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 953 v += 9; 954 } 955 z[0] = sum1; z[1] = sum2; z[2] = sum3; 956 z += 3; 957 } 958 VecRestoreArray_Fast(xx,x); 959 VecRestoreArray_Fast(zz,z); 960 PLogFlops(18*a->nz - a->m); 961 return 0; 962 } 963 964 #undef __FUNC__ 965 #define __FUNC__ "MatMult_SeqBAIJ_4" 966 static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz) 967 { 968 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 969 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4; 970 register Scalar x1,x2,x3,x4; 971 int mbs=a->mbs,i,*idx,*ii; 972 int j,n; 973 974 VecGetArray_Fast(xx,x); 975 VecGetArray_Fast(zz,z); 976 977 idx = a->j; 978 v = a->a; 979 ii = a->i; 980 981 for ( i=0; i<mbs; i++ ) { 982 n = ii[1] - ii[0]; ii++; 983 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 984 for ( j=0; j<n; j++ ) { 985 xb = x + 4*(*idx++); 986 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 987 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 988 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 989 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 990 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 991 v += 16; 992 } 993 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 994 z += 4; 995 } 996 VecRestoreArray_Fast(xx,x); 997 VecRestoreArray_Fast(zz,z); 998 PLogFlops(32*a->nz - a->m); 999 return 0; 1000 } 1001 1002 #undef __FUNC__ 1003 #define __FUNC__ "MatMult_SeqBAIJ_5" 1004 static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz) 1005 { 1006 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1007 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5; 1008 register Scalar x1,x2,x3,x4,x5; 1009 int mbs=a->mbs,i,*idx,*ii,j,n; 1010 1011 VecGetArray_Fast(xx,x); 1012 VecGetArray_Fast(zz,z); 1013 1014 idx = a->j; 1015 v = a->a; 1016 ii = a->i; 1017 1018 for ( i=0; i<mbs; i++ ) { 1019 n = ii[1] - ii[0]; ii++; 1020 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 1021 for ( j=0; j<n; j++ ) { 1022 xb = x + 5*(*idx++); 1023 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 1024 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 1025 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 1026 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 1027 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 1028 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 1029 v += 25; 1030 } 1031 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 1032 z += 5; 1033 } 1034 VecRestoreArray_Fast(xx,x); 1035 VecRestoreArray_Fast(zz,z); 1036 PLogFlops(50*a->nz - a->m); 1037 return 0; 1038 } 1039 1040 #undef __FUNC__ 1041 #define __FUNC__ "MatMult_SeqBAIJ_7" 1042 static int MatMult_SeqBAIJ_7(Mat A,Vec xx,Vec zz) 1043 { 1044 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1045 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7; 1046 register Scalar x1,x2,x3,x4,x5,x6,x7; 1047 int mbs=a->mbs,i,*idx,*ii,j,n; 1048 1049 VecGetArray_Fast(xx,x); 1050 VecGetArray_Fast(zz,z); 1051 1052 idx = a->j; 1053 v = a->a; 1054 ii = a->i; 1055 1056 for ( i=0; i<mbs; i++ ) { 1057 n = ii[1] - ii[0]; ii++; 1058 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0; 1059 for ( j=0; j<n; j++ ) { 1060 xb = x + 7*(*idx++); 1061 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6]; 1062 sum1 += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 1063 sum2 += v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 1064 sum3 += v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 1065 sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 1066 sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 1067 sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 1068 sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 1069 v += 49; 1070 } 1071 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7; 1072 z += 7; 1073 } 1074 1075 VecRestoreArray_Fast(xx,x); 1076 VecRestoreArray_Fast(zz,z); 1077 PLogFlops(98*a->nz - a->m); 1078 return 0; 1079 } 1080 1081 #undef __FUNC__ 1082 #define __FUNC__ "MatMult_SeqBAIJ_N" 1083 static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz) 1084 { 1085 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1086 register Scalar *x,*z,*v,*xb; 1087 int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2; 1088 int _One = 1,ncols,k; 1089 Scalar _DOne = 1.0,*work,*workt,_DZero = 0.0; 1090 1091 VecGetArray_Fast(xx,x); 1092 VecGetArray_Fast(zz,z); 1093 1094 idx = a->j; 1095 v = a->a; 1096 ii = a->i; 1097 1098 1099 if (!a->mult_work) { 1100 k = PetscMax(a->m,a->n); 1101 a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work); 1102 } 1103 work = a->mult_work; 1104 for ( i=0; i<mbs; i++ ) { 1105 n = ii[1] - ii[0]; ii++; 1106 ncols = n*bs; 1107 workt = work; 1108 for ( j=0; j<n; j++ ) { 1109 xb = x + bs*(*idx++); 1110 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 1111 workt += bs; 1112 } 1113 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One); 1114 v += n*bs2; 1115 z += bs; 1116 } 1117 VecRestoreArray_Fast(xx,x); 1118 VecRestoreArray_Fast(zz,z); 1119 PLogFlops(2*a->nz*bs2 - a->m); 1120 return 0; 1121 } 1122 1123 #undef __FUNC__ 1124 #define __FUNC__ "MatMultAdd_SeqBAIJ_1" 1125 static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 1126 { 1127 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1128 register Scalar *x,*y,*z,*v,sum; 1129 int mbs=a->mbs,i,*idx,*ii,n; 1130 1131 VecGetArray_Fast(xx,x); 1132 VecGetArray_Fast(yy,y); 1133 VecGetArray_Fast(zz,z); 1134 1135 idx = a->j; 1136 v = a->a; 1137 ii = a->i; 1138 1139 for ( i=0; i<mbs; i++ ) { 1140 n = ii[1] - ii[0]; ii++; 1141 sum = y[i]; 1142 while (n--) sum += *v++ * x[*idx++]; 1143 z[i] = sum; 1144 } 1145 VecRestoreArray_Fast(xx,x); 1146 VecRestoreArray_Fast(yy,y); 1147 VecRestoreArray_Fast(zz,z); 1148 PLogFlops(2*a->nz); 1149 return 0; 1150 } 1151 1152 #undef __FUNC__ 1153 #define __FUNC__ "MatMultAdd_SeqBAIJ_2" 1154 static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 1155 { 1156 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1157 register Scalar *x,*y,*z,*v,*xb,sum1,sum2; 1158 register Scalar x1,x2; 1159 int mbs=a->mbs,i,*idx,*ii; 1160 int j,n; 1161 1162 VecGetArray_Fast(xx,x); 1163 VecGetArray_Fast(yy,y); 1164 VecGetArray_Fast(zz,z); 1165 1166 idx = a->j; 1167 v = a->a; 1168 ii = a->i; 1169 1170 for ( i=0; i<mbs; i++ ) { 1171 n = ii[1] - ii[0]; ii++; 1172 sum1 = y[0]; sum2 = y[1]; 1173 for ( j=0; j<n; j++ ) { 1174 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 1175 sum1 += v[0]*x1 + v[2]*x2; 1176 sum2 += v[1]*x1 + v[3]*x2; 1177 v += 4; 1178 } 1179 z[0] = sum1; z[1] = sum2; 1180 z += 2; y += 2; 1181 } 1182 VecRestoreArray_Fast(xx,x); 1183 VecRestoreArray_Fast(yy,y); 1184 VecRestoreArray_Fast(zz,z); 1185 PLogFlops(4*a->nz); 1186 return 0; 1187 } 1188 1189 #undef __FUNC__ 1190 #define __FUNC__ "MatMultAdd_SeqBAIJ_3" 1191 static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 1192 { 1193 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1194 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 1195 int mbs=a->mbs,i,*idx,*ii,j,n; 1196 1197 VecGetArray_Fast(xx,x); 1198 VecGetArray_Fast(yy,y); 1199 VecGetArray_Fast(zz,z); 1200 1201 idx = a->j; 1202 v = a->a; 1203 ii = a->i; 1204 1205 for ( i=0; i<mbs; i++ ) { 1206 n = ii[1] - ii[0]; ii++; 1207 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; 1208 for ( j=0; j<n; j++ ) { 1209 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1210 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 1211 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 1212 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 1213 v += 9; 1214 } 1215 z[0] = sum1; z[1] = sum2; z[2] = sum3; 1216 z += 3; y += 3; 1217 } 1218 VecRestoreArray_Fast(xx,x); 1219 VecRestoreArray_Fast(yy,y); 1220 VecRestoreArray_Fast(zz,z); 1221 PLogFlops(18*a->nz); 1222 return 0; 1223 } 1224 1225 #undef __FUNC__ 1226 #define __FUNC__ "MatMultAdd_SeqBAIJ_4" 1227 static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 1228 { 1229 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1230 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4; 1231 register Scalar x1,x2,x3,x4; 1232 int mbs=a->mbs,i,*idx,*ii; 1233 int j,n; 1234 1235 VecGetArray_Fast(xx,x); 1236 VecGetArray_Fast(yy,y); 1237 VecGetArray_Fast(zz,z); 1238 1239 idx = a->j; 1240 v = a->a; 1241 ii = a->i; 1242 1243 for ( i=0; i<mbs; i++ ) { 1244 n = ii[1] - ii[0]; ii++; 1245 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; 1246 for ( j=0; j<n; j++ ) { 1247 xb = x + 4*(*idx++); 1248 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1249 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 1250 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 1251 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 1252 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 1253 v += 16; 1254 } 1255 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 1256 z += 4; y += 4; 1257 } 1258 VecRestoreArray_Fast(xx,x); 1259 VecRestoreArray_Fast(yy,y); 1260 VecRestoreArray_Fast(zz,z); 1261 PLogFlops(32*a->nz); 1262 return 0; 1263 } 1264 1265 #undef __FUNC__ 1266 #define __FUNC__ "MatMultAdd_SeqBAIJ_5" 1267 static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 1268 { 1269 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1270 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5; 1271 register Scalar x1,x2,x3,x4,x5; 1272 int mbs=a->mbs,i,*idx,*ii,j,n; 1273 1274 VecGetArray_Fast(xx,x); 1275 VecGetArray_Fast(yy,y); 1276 VecGetArray_Fast(zz,z); 1277 1278 idx = a->j; 1279 v = a->a; 1280 ii = a->i; 1281 1282 for ( i=0; i<mbs; i++ ) { 1283 n = ii[1] - ii[0]; ii++; 1284 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; 1285 for ( j=0; j<n; j++ ) { 1286 xb = x + 5*(*idx++); 1287 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 1288 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 1289 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 1290 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 1291 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 1292 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 1293 v += 25; 1294 } 1295 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 1296 z += 5; y += 5; 1297 } 1298 VecRestoreArray_Fast(xx,x); 1299 VecRestoreArray_Fast(yy,y); 1300 VecRestoreArray_Fast(zz,z); 1301 PLogFlops(50*a->nz); 1302 return 0; 1303 } 1304 1305 #undef __FUNC__ 1306 #define __FUNC__ "MatMultAdd_SeqBAIJ_7" 1307 static int MatMultAdd_SeqBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1308 { 1309 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1310 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7; 1311 register Scalar x1,x2,x3,x4,x5,x6,x7; 1312 int mbs=a->mbs,i,*idx,*ii,j,n; 1313 1314 VecGetArray_Fast(xx,x); 1315 VecGetArray_Fast(yy,y); 1316 VecGetArray_Fast(zz,z); 1317 1318 idx = a->j; 1319 v = a->a; 1320 ii = a->i; 1321 1322 for ( i=0; i<mbs; i++ ) { 1323 n = ii[1] - ii[0]; ii++; 1324 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; sum7 = y[6]; 1325 for ( j=0; j<n; j++ ) { 1326 xb = x + 7*(*idx++); 1327 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6]; 1328 sum1 += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 1329 sum2 += v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 1330 sum3 += v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 1331 sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 1332 sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 1333 sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 1334 sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 1335 v += 49; 1336 } 1337 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7; 1338 z += 7; y += 7; 1339 } 1340 VecRestoreArray_Fast(xx,x); 1341 VecRestoreArray_Fast(yy,y); 1342 VecRestoreArray_Fast(zz,z); 1343 PLogFlops(98*a->nz); 1344 return 0; 1345 } 1346 1347 1348 #undef __FUNC__ 1349 #define __FUNC__ "MatMultAdd_SeqBAIJ_N" 1350 static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 1351 { 1352 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1353 register Scalar *x,*z,*v,*xb; 1354 int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr; 1355 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1356 1357 if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 1358 1359 VecGetArray_Fast(xx,x); 1360 VecGetArray_Fast(zz,z); 1361 1362 idx = a->j; 1363 v = a->a; 1364 ii = a->i; 1365 1366 1367 if (!a->mult_work) { 1368 k = PetscMax(a->m,a->n); 1369 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work); 1370 } 1371 work = a->mult_work; 1372 for ( i=0; i<mbs; i++ ) { 1373 n = ii[1] - ii[0]; ii++; 1374 ncols = n*bs; 1375 workt = work; 1376 for ( j=0; j<n; j++ ) { 1377 xb = x + bs*(*idx++); 1378 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 1379 workt += bs; 1380 } 1381 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); 1382 v += n*bs2; 1383 z += bs; 1384 } 1385 VecRestoreArray_Fast(xx,x); 1386 VecRestoreArray_Fast(zz,z); 1387 PLogFlops(2*a->nz*bs2 ); 1388 return 0; 1389 } 1390 1391 #undef __FUNC__ 1392 #define __FUNC__ "MatMultTrans_SeqBAIJ" 1393 int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz) 1394 { 1395 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1396 Scalar *xg,*zg,*zb; 1397 register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 1398 int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 1399 int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1400 1401 1402 VecGetArray_Fast(xx,xg); x = xg; 1403 VecGetArray_Fast(zz,zg); z = zg; 1404 PetscMemzero(z,N*sizeof(Scalar)); 1405 1406 idx = a->j; 1407 v = a->a; 1408 ii = a->i; 1409 1410 switch (bs) { 1411 case 1: 1412 for ( i=0; i<mbs; i++ ) { 1413 n = ii[1] - ii[0]; ii++; 1414 xb = x + i; x1 = xb[0]; 1415 ib = idx + ai[i]; 1416 for ( j=0; j<n; j++ ) { 1417 rval = ib[j]; 1418 z[rval] += *v++ * x1; 1419 } 1420 } 1421 break; 1422 case 2: 1423 for ( i=0; i<mbs; i++ ) { 1424 n = ii[1] - ii[0]; ii++; 1425 xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1426 ib = idx + ai[i]; 1427 for ( j=0; j<n; j++ ) { 1428 rval = ib[j]*2; 1429 z[rval++] += v[0]*x1 + v[1]*x2; 1430 z[rval++] += v[2]*x1 + v[3]*x2; 1431 v += 4; 1432 } 1433 } 1434 break; 1435 case 3: 1436 for ( i=0; i<mbs; i++ ) { 1437 n = ii[1] - ii[0]; ii++; 1438 xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1439 ib = idx + ai[i]; 1440 for ( j=0; j<n; j++ ) { 1441 rval = ib[j]*3; 1442 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1443 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1444 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1445 v += 9; 1446 } 1447 } 1448 break; 1449 case 4: 1450 for ( i=0; i<mbs; i++ ) { 1451 n = ii[1] - ii[0]; ii++; 1452 xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1453 ib = idx + ai[i]; 1454 for ( j=0; j<n; j++ ) { 1455 rval = ib[j]*4; 1456 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1457 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1458 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1459 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1460 v += 16; 1461 } 1462 } 1463 break; 1464 case 5: 1465 for ( i=0; i<mbs; i++ ) { 1466 n = ii[1] - ii[0]; ii++; 1467 xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1468 x4 = xb[3]; x5 = xb[4]; 1469 ib = idx + ai[i]; 1470 for ( j=0; j<n; j++ ) { 1471 rval = ib[j]*5; 1472 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1473 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1474 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1475 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1476 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1477 v += 25; 1478 } 1479 } 1480 break; 1481 /* block sizes larger then 5 by 5 are handled by BLAS */ 1482 default: { 1483 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1484 if (!a->mult_work) { 1485 k = PetscMax(a->m,a->n); 1486 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1487 CHKPTRQ(a->mult_work); 1488 } 1489 work = a->mult_work; 1490 for ( i=0; i<mbs; i++ ) { 1491 n = ii[1] - ii[0]; ii++; 1492 ncols = n*bs; 1493 PetscMemzero(work,ncols*sizeof(Scalar)); 1494 LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1495 v += n*bs2; 1496 x += bs; 1497 workt = work; 1498 for ( j=0; j<n; j++ ) { 1499 zb = z + bs*(*idx++); 1500 for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1501 workt += bs; 1502 } 1503 } 1504 } 1505 } 1506 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 1507 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1508 PLogFlops(2*a->nz*a->bs2 - a->n); 1509 return 0; 1510 } 1511 1512 #undef __FUNC__ 1513 #define __FUNC__ "MatMultTransAdd_SeqBAIJ" 1514 int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1515 { 1516 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1517 Scalar *xg,*zg,*zb; 1518 register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 1519 int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 1520 int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1521 1522 1523 1524 VecGetArray_Fast(xx,xg); x = xg; 1525 VecGetArray_Fast(zz,zg); z = zg; 1526 1527 if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 1528 else PetscMemzero(z,N*sizeof(Scalar)); 1529 1530 1531 idx = a->j; 1532 v = a->a; 1533 ii = a->i; 1534 1535 switch (bs) { 1536 case 1: 1537 for ( i=0; i<mbs; i++ ) { 1538 n = ii[1] - ii[0]; ii++; 1539 xb = x + i; x1 = xb[0]; 1540 ib = idx + ai[i]; 1541 for ( j=0; j<n; j++ ) { 1542 rval = ib[j]; 1543 z[rval] += *v++ * x1; 1544 } 1545 } 1546 break; 1547 case 2: 1548 for ( i=0; i<mbs; i++ ) { 1549 n = ii[1] - ii[0]; ii++; 1550 xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1551 ib = idx + ai[i]; 1552 for ( j=0; j<n; j++ ) { 1553 rval = ib[j]*2; 1554 z[rval++] += v[0]*x1 + v[1]*x2; 1555 z[rval++] += v[2]*x1 + v[3]*x2; 1556 v += 4; 1557 } 1558 } 1559 break; 1560 case 3: 1561 for ( i=0; i<mbs; i++ ) { 1562 n = ii[1] - ii[0]; ii++; 1563 xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1564 ib = idx + ai[i]; 1565 for ( j=0; j<n; j++ ) { 1566 rval = ib[j]*3; 1567 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1568 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1569 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1570 v += 9; 1571 } 1572 } 1573 break; 1574 case 4: 1575 for ( i=0; i<mbs; i++ ) { 1576 n = ii[1] - ii[0]; ii++; 1577 xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1578 ib = idx + ai[i]; 1579 for ( j=0; j<n; j++ ) { 1580 rval = ib[j]*4; 1581 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1582 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1583 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1584 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1585 v += 16; 1586 } 1587 } 1588 break; 1589 case 5: 1590 for ( i=0; i<mbs; i++ ) { 1591 n = ii[1] - ii[0]; ii++; 1592 xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1593 x4 = xb[3]; x5 = xb[4]; 1594 ib = idx + ai[i]; 1595 for ( j=0; j<n; j++ ) { 1596 rval = ib[j]*5; 1597 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1598 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1599 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1600 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1601 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1602 v += 25; 1603 } 1604 } 1605 break; 1606 /* block sizes larger then 5 by 5 are handled by BLAS */ 1607 default: { 1608 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1609 if (!a->mult_work) { 1610 k = PetscMax(a->m,a->n); 1611 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1612 CHKPTRQ(a->mult_work); 1613 } 1614 work = a->mult_work; 1615 for ( i=0; i<mbs; i++ ) { 1616 n = ii[1] - ii[0]; ii++; 1617 ncols = n*bs; 1618 PetscMemzero(work,ncols*sizeof(Scalar)); 1619 LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1620 v += n*bs2; 1621 x += bs; 1622 workt = work; 1623 for ( j=0; j<n; j++ ) { 1624 zb = z + bs*(*idx++); 1625 for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1626 workt += bs; 1627 } 1628 } 1629 } 1630 } 1631 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 1632 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1633 PLogFlops(2*a->nz*a->bs2); 1634 return 0; 1635 } 1636 1637 #undef __FUNC__ 1638 #define __FUNC__ "MatGetInfo_SeqBAIJ" /* ADIC Ignore */ 1639 int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info) 1640 { 1641 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1642 1643 info->rows_global = (double)a->m; 1644 info->columns_global = (double)a->n; 1645 info->rows_local = (double)a->m; 1646 info->columns_local = (double)a->n; 1647 info->block_size = a->bs2; 1648 info->nz_allocated = a->maxnz; 1649 info->nz_used = a->bs2*a->nz; 1650 info->nz_unneeded = (double)(info->nz_allocated - info->nz_used); 1651 /* if (info->nz_unneeded != A->info.nz_unneeded) 1652 printf("space descrepancy: maxnz-nz = %d, nz_unneeded = %d\n",(int)info->nz_unneeded,(int)A->info.nz_unneeded); */ 1653 info->assemblies = A->num_ass; 1654 info->mallocs = a->reallocs; 1655 info->memory = A->mem; 1656 if (A->factor) { 1657 info->fill_ratio_given = A->info.fill_ratio_given; 1658 info->fill_ratio_needed = A->info.fill_ratio_needed; 1659 info->factor_mallocs = A->info.factor_mallocs; 1660 } else { 1661 info->fill_ratio_given = 0; 1662 info->fill_ratio_needed = 0; 1663 info->factor_mallocs = 0; 1664 } 1665 return 0; 1666 } 1667 1668 #undef __FUNC__ 1669 #define __FUNC__ "MatEqual_SeqBAIJ" 1670 int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg) 1671 { 1672 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 1673 1674 if (B->type !=MATSEQBAIJ)SETERRQ(1,0,"Matrices must be same type"); 1675 1676 /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 1677 if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)|| 1678 (a->nz != b->nz)) { 1679 *flg = PETSC_FALSE; return 0; 1680 } 1681 1682 /* if the a->i are the same */ 1683 if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) { 1684 *flg = PETSC_FALSE; return 0; 1685 } 1686 1687 /* if a->j are the same */ 1688 if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) { 1689 *flg = PETSC_FALSE; return 0; 1690 } 1691 1692 /* if a->a are the same */ 1693 if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) { 1694 *flg = PETSC_FALSE; return 0; 1695 } 1696 *flg = PETSC_TRUE; 1697 return 0; 1698 1699 } 1700 1701 #undef __FUNC__ 1702 #define __FUNC__ "MatGetDiagonal_SeqBAIJ" 1703 int MatGetDiagonal_SeqBAIJ(Mat A,Vec v) 1704 { 1705 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1706 int i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 1707 Scalar *x, zero = 0.0,*aa,*aa_j; 1708 1709 if (A->factor) SETERRQ(1,0,"Not for factored matrix"); 1710 bs = a->bs; 1711 aa = a->a; 1712 ai = a->i; 1713 aj = a->j; 1714 ambs = a->mbs; 1715 bs2 = a->bs2; 1716 1717 VecSet(&zero,v); 1718 VecGetArray_Fast(v,x); VecGetLocalSize_Fast(v,n); 1719 if (n != a->m) SETERRQ(1,0,"Nonconforming matrix and vector"); 1720 for ( i=0; i<ambs; i++ ) { 1721 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1722 if (aj[j] == i) { 1723 row = i*bs; 1724 aa_j = aa+j*bs2; 1725 for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 1726 break; 1727 } 1728 } 1729 } 1730 return 0; 1731 } 1732 1733 #undef __FUNC__ 1734 #define __FUNC__ "MatDiagonalScale_SeqBAIJ" 1735 int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr) 1736 { 1737 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1738 Scalar *l,*r,x,*v,*aa,*li,*ri; 1739 int i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2; 1740 1741 ai = a->i; 1742 aj = a->j; 1743 aa = a->a; 1744 m = a->m; 1745 n = a->n; 1746 bs = a->bs; 1747 mbs = a->mbs; 1748 bs2 = a->bs2; 1749 if (ll) { 1750 VecGetArray_Fast(ll,l); VecGetLocalSize_Fast(ll,lm); 1751 if (lm != m) SETERRQ(1,0,"Left scaling vector wrong length"); 1752 for ( i=0; i<mbs; i++ ) { /* for each block row */ 1753 M = ai[i+1] - ai[i]; 1754 li = l + i*bs; 1755 v = aa + bs2*ai[i]; 1756 for ( j=0; j<M; j++ ) { /* for each block */ 1757 for ( k=0; k<bs2; k++ ) { 1758 (*v++) *= li[k%bs]; 1759 } 1760 } 1761 } 1762 } 1763 1764 if (rr) { 1765 VecGetArray_Fast(rr,r); VecGetLocalSize_Fast(rr,rn); 1766 if (rn != n) SETERRQ(1,0,"Right scaling vector wrong length"); 1767 for ( i=0; i<mbs; i++ ) { /* for each block row */ 1768 M = ai[i+1] - ai[i]; 1769 v = aa + bs2*ai[i]; 1770 for ( j=0; j<M; j++ ) { /* for each block */ 1771 ri = r + bs*aj[ai[i]+j]; 1772 for ( k=0; k<bs; k++ ) { 1773 x = ri[k]; 1774 for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x; 1775 } 1776 } 1777 } 1778 } 1779 return 0; 1780 } 1781 1782 1783 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 1784 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 1785 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int); 1786 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*); 1787 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**); 1788 1789 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec); 1790 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec); 1791 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec); 1792 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec); 1793 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec); 1794 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec); 1795 extern int MatSolve_SeqBAIJ_7(Mat,Vec,Vec); 1796 1797 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 1798 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 1799 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 1800 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 1801 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 1802 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 1803 1804 #undef __FUNC__ 1805 #define __FUNC__ "MatNorm_SeqBAIJ" 1806 int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm) 1807 { 1808 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1809 Scalar *v = a->a; 1810 double sum = 0.0; 1811 int i,nz=a->nz,bs2=a->bs2; 1812 1813 if (type == NORM_FROBENIUS) { 1814 for (i=0; i< bs2*nz; i++ ) { 1815 #if defined(PETSC_COMPLEX) 1816 sum += real(conj(*v)*(*v)); v++; 1817 #else 1818 sum += (*v)*(*v); v++; 1819 #endif 1820 } 1821 *norm = sqrt(sum); 1822 } 1823 else { 1824 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 1825 } 1826 return 0; 1827 } 1828 1829 extern int MatSolve_SeqBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 1830 /* 1831 note: This can only work for identity for row and col. It would 1832 be good to check this and otherwise generate an error. 1833 */ 1834 #undef __FUNC__ 1835 #define __FUNC__ "MatILUFactor_SeqBAIJ" 1836 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill) 1837 { 1838 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1839 Mat outA; 1840 int ierr; 1841 1842 if (fill != 0) SETERRQ(1,0,"Only fill=0 supported"); 1843 1844 outA = inA; 1845 inA->factor = FACTOR_LU; 1846 a->row = row; 1847 a->col = col; 1848 1849 if (!a->solve_work) { 1850 a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 1851 PLogObjectMemory(inA,(a->m+a->bs)*sizeof(Scalar)); 1852 } 1853 1854 if (!a->diag) { 1855 ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr); 1856 } 1857 ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr); 1858 1859 /* 1860 Blocksize 4 has a special faster solver for ILU(0) factorization 1861 with natural ordering 1862 */ 1863 if (a->bs == 4) { 1864 inA->ops.solve = MatSolve_SeqBAIJ_4_NaturalOrdering; 1865 } 1866 1867 return 0; 1868 } 1869 1870 #undef __FUNC__ 1871 #define __FUNC__ "MatScale_SeqBAIJ" 1872 int MatScale_SeqBAIJ(Scalar *alpha,Mat inA) 1873 { 1874 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1875 int one = 1, totalnz = a->bs2*a->nz; 1876 BLscal_( &totalnz, alpha, a->a, &one ); 1877 PLogFlops(totalnz); 1878 return 0; 1879 } 1880 1881 #undef __FUNC__ 1882 #define __FUNC__ "MatGetValues_SeqBAIJ" 1883 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 1884 { 1885 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1886 int *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 1887 int *ai = a->i, *ailen = a->ilen; 1888 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 1889 Scalar *ap, *aa = a->a, zero = 0.0; 1890 1891 for ( k=0; k<m; k++ ) { /* loop over rows */ 1892 row = im[k]; brow = row/bs; 1893 if (row < 0) SETERRQ(1,0,"Negative row"); 1894 if (row >= a->m) SETERRQ(1,0,"Row too large"); 1895 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1896 nrow = ailen[brow]; 1897 for ( l=0; l<n; l++ ) { /* loop over columns */ 1898 if (in[l] < 0) SETERRQ(1,0,"Negative column"); 1899 if (in[l] >= a->n) SETERRQ(1,0,"Column too large"); 1900 col = in[l] ; 1901 bcol = col/bs; 1902 cidx = col%bs; 1903 ridx = row%bs; 1904 high = nrow; 1905 low = 0; /* assume unsorted */ 1906 while (high-low > 5) { 1907 t = (low+high)/2; 1908 if (rp[t] > bcol) high = t; 1909 else low = t; 1910 } 1911 for ( i=low; i<high; i++ ) { 1912 if (rp[i] > bcol) break; 1913 if (rp[i] == bcol) { 1914 *v++ = ap[bs2*i+bs*cidx+ridx]; 1915 goto finished; 1916 } 1917 } 1918 *v++ = zero; 1919 finished:; 1920 } 1921 } 1922 return 0; 1923 } 1924 1925 #undef __FUNC__ 1926 #define __FUNC__ "MatGetBlockSize_SeqBAIJ" /* ADIC Ignore */ 1927 int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs) 1928 { 1929 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data; 1930 *bs = baij->bs; 1931 return 0; 1932 } 1933 1934 /* idx should be of length atlease bs */ 1935 #undef __FUNC__ 1936 #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Block" 1937 static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg) 1938 { 1939 int i,row; 1940 row = idx[0]; 1941 if (row%bs!=0) { *flg = PETSC_FALSE; return 0; } 1942 1943 for ( i=1; i<bs; i++ ) { 1944 if (row+i != idx[i]) { *flg = PETSC_FALSE; return 0; } 1945 } 1946 *flg = PETSC_TRUE; 1947 return 0; 1948 } 1949 1950 #undef __FUNC__ 1951 #define __FUNC__ "MatZeroRows_SeqBAIJ" 1952 int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag) 1953 { 1954 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1955 IS is_local; 1956 int ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2; 1957 PetscTruth flg; 1958 Scalar zero = 0.0,*aa; 1959 1960 /* Make a copy of the IS and sort it */ 1961 ierr = ISGetSize(is,&is_n);CHKERRQ(ierr); 1962 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1963 ierr = ISCreateGeneral(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr); 1964 ierr = ISSort(is_local); CHKERRQ(ierr); 1965 ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr); 1966 1967 i = 0; 1968 while (i < is_n) { 1969 if (rows[i]<0 || rows[i]>m) SETERRQ(1,0,"row out of range"); 1970 flg = PETSC_FALSE; 1971 if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); } 1972 if (flg) { /* There exists a block of rows to be Zerowed */ 1973 baij->ilen[rows[i]/bs] = 0; 1974 i += bs; 1975 } else { /* Zero out only the requested row */ 1976 count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs; 1977 aa = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs); 1978 for ( j=0; j<count; j++ ) { 1979 aa[0] = zero; 1980 aa+=bs; 1981 } 1982 i++; 1983 } 1984 } 1985 if (diag) { 1986 for ( j=0; j<is_n; j++ ) { 1987 ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1988 } 1989 } 1990 ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr); 1991 ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr); 1992 ierr = ISDestroy(is_local); CHKERRQ(ierr); 1993 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1994 1995 return 0; 1996 } 1997 1998 #undef __FUNC__ 1999 #define __FUNC__ "MatPrintHelp_SeqBAIJ" /* ADIC Ignore */ 2000 int MatPrintHelp_SeqBAIJ(Mat A) 2001 { 2002 static int called = 0; 2003 MPI_Comm comm = A->comm; 2004 2005 if (called) return 0; else called = 1; 2006 PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n"); 2007 PetscPrintf(comm," -mat_block_size <block_size>\n"); 2008 return 0; 2009 } 2010 2011 /* -------------------------------------------------------------------*/ 2012 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ, 2013 MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ, 2014 MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N, 2015 MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ, 2016 MatSolve_SeqBAIJ_N,0, 2017 0,0, 2018 MatLUFactor_SeqBAIJ,0, 2019 0, 2020 MatTranspose_SeqBAIJ, 2021 MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ, 2022 MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ, 2023 0,MatAssemblyEnd_SeqBAIJ, 2024 0, 2025 MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ, 2026 MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0, 2027 MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ, 2028 MatILUFactorSymbolic_SeqBAIJ,0, 2029 0,0, 2030 MatConvertSameType_SeqBAIJ,0,0, 2031 MatILUFactor_SeqBAIJ,0,0, 2032 MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ, 2033 MatGetValues_SeqBAIJ,0, 2034 MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ, 2035 0,0,0,MatGetBlockSize_SeqBAIJ, 2036 MatGetRowIJ_SeqBAIJ, 2037 MatRestoreRowIJ_SeqBAIJ, 2038 0,0,0,0,0,0, 2039 MatSetValuesBlocked_SeqBAIJ}; 2040 2041 #undef __FUNC__ 2042 #define __FUNC__ "MatCreateSeqBAIJ" 2043 /*@C 2044 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 2045 compressed row) format. For good matrix assembly performance the 2046 user should preallocate the matrix storage by setting the parameter nz 2047 (or the array nzz). By setting these parameters accurately, performance 2048 during matrix assembly can be increased by more than a factor of 50. 2049 2050 Input Parameters: 2051 . comm - MPI communicator, set to PETSC_COMM_SELF 2052 . bs - size of block 2053 . m - number of rows 2054 . n - number of columns 2055 . nz - number of block nonzeros per block row (same for all rows) 2056 . nzz - array containing the number of block nonzeros in the various block rows 2057 (possibly different for each block row) or PETSC_NULL 2058 2059 Output Parameter: 2060 . A - the matrix 2061 2062 Options Database Keys: 2063 $ -mat_no_unroll - uses code that does not unroll the loops in the 2064 $ block calculations (much solver) 2065 $ -mat_block_size - size of the blocks to use 2066 2067 Notes: 2068 The block AIJ format is fully compatible with standard Fortran 77 2069 storage. That is, the stored row and column indices can begin at 2070 either one (as in Fortran) or zero. See the users' manual for details. 2071 2072 Specify the preallocated storage with either nz or nnz (not both). 2073 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2074 allocation. For additional details, see the users manual chapter on 2075 matrices. 2076 2077 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2078 @*/ 2079 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 2080 { 2081 Mat B; 2082 Mat_SeqBAIJ *b; 2083 int i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs,size; 2084 2085 MPI_Comm_size(comm,&size); 2086 if (size > 1) SETERRQ(1,0,"Comm must be of size 1"); 2087 2088 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr); 2089 2090 if (mbs*bs!=m || nbs*bs!=n) 2091 SETERRQ(1,0,"Number rows, cols must be divisible by blocksize"); 2092 2093 *A = 0; 2094 PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATSEQBAIJ,comm); 2095 PLogObjectCreate(B); 2096 B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b); 2097 PetscMemzero(b,sizeof(Mat_SeqBAIJ)); 2098 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 2099 ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr); 2100 if (!flg) { 2101 switch (bs) { 2102 case 1: 2103 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2104 B->ops.solve = MatSolve_SeqBAIJ_1; 2105 B->ops.mult = MatMult_SeqBAIJ_1; 2106 B->ops.multadd = MatMultAdd_SeqBAIJ_1; 2107 break; 2108 case 2: 2109 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2110 B->ops.solve = MatSolve_SeqBAIJ_2; 2111 B->ops.mult = MatMult_SeqBAIJ_2; 2112 B->ops.multadd = MatMultAdd_SeqBAIJ_2; 2113 break; 2114 case 3: 2115 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2116 B->ops.solve = MatSolve_SeqBAIJ_3; 2117 B->ops.mult = MatMult_SeqBAIJ_3; 2118 B->ops.multadd = MatMultAdd_SeqBAIJ_3; 2119 break; 2120 case 4: 2121 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2122 B->ops.solve = MatSolve_SeqBAIJ_4; 2123 B->ops.mult = MatMult_SeqBAIJ_4; 2124 B->ops.multadd = MatMultAdd_SeqBAIJ_4; 2125 break; 2126 case 5: 2127 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2128 B->ops.solve = MatSolve_SeqBAIJ_5; 2129 B->ops.mult = MatMult_SeqBAIJ_5; 2130 B->ops.multadd = MatMultAdd_SeqBAIJ_5; 2131 break; 2132 case 7: 2133 B->ops.mult = MatMult_SeqBAIJ_7; 2134 B->ops.solve = MatSolve_SeqBAIJ_7; 2135 B->ops.multadd = MatMultAdd_SeqBAIJ_7; 2136 break; 2137 } 2138 } 2139 B->destroy = MatDestroy_SeqBAIJ; 2140 B->view = MatView_SeqBAIJ; 2141 B->factor = 0; 2142 B->lupivotthreshold = 1.0; 2143 B->mapping = 0; 2144 b->row = 0; 2145 b->col = 0; 2146 b->reallocs = 0; 2147 2148 b->m = m; B->m = m; B->M = m; 2149 b->n = n; B->n = n; B->N = n; 2150 b->mbs = mbs; 2151 b->nbs = nbs; 2152 b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax); 2153 if (nnz == PETSC_NULL) { 2154 if (nz == PETSC_DEFAULT) nz = 5; 2155 else if (nz <= 0) nz = 1; 2156 for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 2157 nz = nz*mbs; 2158 } 2159 else { 2160 nz = 0; 2161 for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 2162 } 2163 2164 /* allocate the matrix space */ 2165 len = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int); 2166 b->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a); 2167 PetscMemzero(b->a,nz*bs2*sizeof(Scalar)); 2168 b->j = (int *) (b->a + nz*bs2); 2169 PetscMemzero(b->j,nz*sizeof(int)); 2170 b->i = b->j + nz; 2171 b->singlemalloc = 1; 2172 2173 b->i[0] = 0; 2174 for (i=1; i<mbs+1; i++) { 2175 b->i[i] = b->i[i-1] + b->imax[i-1]; 2176 } 2177 2178 /* b->ilen will count nonzeros in each block row so far. */ 2179 b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 2180 PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 2181 for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 2182 2183 b->bs = bs; 2184 b->bs2 = bs2; 2185 b->mbs = mbs; 2186 b->nz = 0; 2187 b->maxnz = nz*bs2; 2188 b->sorted = 0; 2189 b->roworiented = 1; 2190 b->nonew = 0; 2191 b->diag = 0; 2192 b->solve_work = 0; 2193 b->mult_work = 0; 2194 b->spptr = 0; 2195 B->info.nz_unneeded = (double)b->maxnz; 2196 2197 *A = B; 2198 ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr); 2199 if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); } 2200 return 0; 2201 } 2202 2203 #undef __FUNC__ 2204 #define __FUNC__ "MatConvertSameType_SeqBAIJ" 2205 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues) 2206 { 2207 Mat C; 2208 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 2209 int i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 2210 2211 if (a->i[mbs] != nz) SETERRQ(1,0,"Corrupt matrix"); 2212 2213 *B = 0; 2214 PetscHeaderCreate(C,_p_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm); 2215 PLogObjectCreate(C); 2216 C->data = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c); 2217 PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps)); 2218 C->destroy = MatDestroy_SeqBAIJ; 2219 C->view = MatView_SeqBAIJ; 2220 C->factor = A->factor; 2221 c->row = 0; 2222 c->col = 0; 2223 C->assembled = PETSC_TRUE; 2224 2225 c->m = C->m = a->m; 2226 c->n = C->n = a->n; 2227 C->M = a->m; 2228 C->N = a->n; 2229 2230 c->bs = a->bs; 2231 c->bs2 = a->bs2; 2232 c->mbs = a->mbs; 2233 c->nbs = a->nbs; 2234 2235 c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax); 2236 c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen); 2237 for ( i=0; i<mbs; i++ ) { 2238 c->imax[i] = a->imax[i]; 2239 c->ilen[i] = a->ilen[i]; 2240 } 2241 2242 /* allocate the matrix space */ 2243 c->singlemalloc = 1; 2244 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int)); 2245 c->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a); 2246 c->j = (int *) (c->a + nz*bs2); 2247 c->i = c->j + nz; 2248 PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int)); 2249 if (mbs > 0) { 2250 PetscMemcpy(c->j,a->j,nz*sizeof(int)); 2251 if (cpvalues == COPY_VALUES) { 2252 PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar)); 2253 } 2254 } 2255 2256 PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 2257 c->sorted = a->sorted; 2258 c->roworiented = a->roworiented; 2259 c->nonew = a->nonew; 2260 2261 if (a->diag) { 2262 c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag); 2263 PLogObjectMemory(C,(mbs+1)*sizeof(int)); 2264 for ( i=0; i<mbs; i++ ) { 2265 c->diag[i] = a->diag[i]; 2266 } 2267 } 2268 else c->diag = 0; 2269 c->nz = a->nz; 2270 c->maxnz = a->maxnz; 2271 c->solve_work = 0; 2272 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 2273 c->mult_work = 0; 2274 *B = C; 2275 return 0; 2276 } 2277 2278 #undef __FUNC__ 2279 #define __FUNC__ "MatLoad_SeqBAIJ" 2280 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 2281 { 2282 Mat_SeqBAIJ *a; 2283 Mat B; 2284 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg; 2285 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2286 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 2287 int *masked, nmask,tmp,bs2,ishift; 2288 Scalar *aa; 2289 MPI_Comm comm = ((PetscObject) viewer)->comm; 2290 2291 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 2292 bs2 = bs*bs; 2293 2294 MPI_Comm_size(comm,&size); 2295 if (size > 1) SETERRQ(1,0,"view must have one processor"); 2296 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 2297 ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr); 2298 if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not Mat object"); 2299 M = header[1]; N = header[2]; nz = header[3]; 2300 2301 if (M != N) SETERRQ(1,0,"Can only do square matrices"); 2302 2303 /* 2304 This code adds extra rows to make sure the number of rows is 2305 divisible by the blocksize 2306 */ 2307 mbs = M/bs; 2308 extra_rows = bs - M + bs*(mbs); 2309 if (extra_rows == bs) extra_rows = 0; 2310 else mbs++; 2311 if (extra_rows) { 2312 PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 2313 } 2314 2315 /* read in row lengths */ 2316 rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 2317 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 2318 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 2319 2320 /* read in column indices */ 2321 jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj); 2322 ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr); 2323 for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 2324 2325 /* loop over row lengths determining block row lengths */ 2326 browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 2327 PetscMemzero(browlengths,mbs*sizeof(int)); 2328 mask = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask); 2329 PetscMemzero(mask,mbs*sizeof(int)); 2330 masked = mask + mbs; 2331 rowcount = 0; nzcount = 0; 2332 for ( i=0; i<mbs; i++ ) { 2333 nmask = 0; 2334 for ( j=0; j<bs; j++ ) { 2335 kmax = rowlengths[rowcount]; 2336 for ( k=0; k<kmax; k++ ) { 2337 tmp = jj[nzcount++]/bs; 2338 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2339 } 2340 rowcount++; 2341 } 2342 browlengths[i] += nmask; 2343 /* zero out the mask elements we set */ 2344 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 2345 } 2346 2347 /* create our matrix */ 2348 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr); 2349 B = *A; 2350 a = (Mat_SeqBAIJ *) B->data; 2351 2352 /* set matrix "i" values */ 2353 a->i[0] = 0; 2354 for ( i=1; i<= mbs; i++ ) { 2355 a->i[i] = a->i[i-1] + browlengths[i-1]; 2356 a->ilen[i-1] = browlengths[i-1]; 2357 } 2358 a->nz = 0; 2359 for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 2360 2361 /* read in nonzero values */ 2362 aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 2363 ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr); 2364 for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 2365 2366 /* set "a" and "j" values into matrix */ 2367 nzcount = 0; jcount = 0; 2368 for ( i=0; i<mbs; i++ ) { 2369 nzcountb = nzcount; 2370 nmask = 0; 2371 for ( j=0; j<bs; j++ ) { 2372 kmax = rowlengths[i*bs+j]; 2373 for ( k=0; k<kmax; k++ ) { 2374 tmp = jj[nzcount++]/bs; 2375 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2376 } 2377 rowcount++; 2378 } 2379 /* sort the masked values */ 2380 PetscSortInt(nmask,masked); 2381 2382 /* set "j" values into matrix */ 2383 maskcount = 1; 2384 for ( j=0; j<nmask; j++ ) { 2385 a->j[jcount++] = masked[j]; 2386 mask[masked[j]] = maskcount++; 2387 } 2388 /* set "a" values into matrix */ 2389 ishift = bs2*a->i[i]; 2390 for ( j=0; j<bs; j++ ) { 2391 kmax = rowlengths[i*bs+j]; 2392 for ( k=0; k<kmax; k++ ) { 2393 tmp = jj[nzcountb]/bs ; 2394 block = mask[tmp] - 1; 2395 point = jj[nzcountb] - bs*tmp; 2396 idx = ishift + bs2*block + j + bs*point; 2397 a->a[idx] = aa[nzcountb++]; 2398 } 2399 } 2400 /* zero out the mask elements we set */ 2401 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 2402 } 2403 if (jcount != a->nz) SETERRQ(1,0,"Error bad binary matrix"); 2404 2405 PetscFree(rowlengths); 2406 PetscFree(browlengths); 2407 PetscFree(aa); 2408 PetscFree(jj); 2409 PetscFree(mask); 2410 2411 B->assembled = PETSC_TRUE; 2412 2413 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr); 2414 if (flg) { 2415 Viewer tviewer; 2416 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2417 ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO,0);CHKERRQ(ierr); 2418 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2419 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2420 } 2421 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr); 2422 if (flg) { 2423 Viewer tviewer; 2424 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2425 ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO_LONG,0);CHKERRQ(ierr); 2426 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2427 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2428 } 2429 ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr); 2430 if (flg) { 2431 Viewer tviewer; 2432 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2433 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2434 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2435 } 2436 ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr); 2437 if (flg) { 2438 Viewer tviewer; 2439 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2440 ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_MATLAB,"M");CHKERRQ(ierr); 2441 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2442 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2443 } 2444 ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr); 2445 if (flg) { 2446 Viewer tviewer; 2447 ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr); 2448 ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr); 2449 ierr = ViewerFlush(tviewer); CHKERRQ(ierr); 2450 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2451 } 2452 return 0; 2453 } 2454 2455 2456 2457