1 #ifndef lint 2 static char vcid[] = "$Id: baij.c,v 1.104 1997/06/05 12:54:29 bsmith Exp curfman $"; 3 #endif 4 5 /* 6 Defines the basic matrix operations for the BAIJ (compressed row) 7 matrix storage format. 8 */ 9 10 #include "pinclude/pviewer.h" 11 #include "sys.h" 12 #include "src/mat/impls/baij/seq/baij.h" 13 #include "src/vec/vecimpl.h" 14 #include "src/inline/spops.h" 15 #include "petsc.h" 16 17 18 /* 19 Adds diagonal pointers to sparse matrix structure. 20 */ 21 22 #undef __FUNC__ 23 #define __FUNC__ "MatMarkDiag_SeqBAIJ" 24 int MatMarkDiag_SeqBAIJ(Mat A) 25 { 26 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 27 int i,j, *diag, m = a->mbs; 28 29 diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag); 30 PLogObjectMemory(A,(m+1)*sizeof(int)); 31 for ( i=0; i<m; i++ ) { 32 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 33 if (a->j[j] == i) { 34 diag[i] = j; 35 break; 36 } 37 } 38 } 39 a->diag = diag; 40 return 0; 41 } 42 43 44 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 45 46 #undef __FUNC__ 47 #define __FUNC__ "MatGetRowIJ_SeqBAIJ" 48 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 49 PetscTruth *done) 50 { 51 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 52 int ierr, n = a->mbs,i; 53 54 *nn = n; 55 if (!ia) return 0; 56 if (symmetric) { 57 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 58 } else if (oshift == 1) { 59 /* temporarily add 1 to i and j indices */ 60 int nz = a->i[n] + 1; 61 for ( i=0; i<nz; i++ ) a->j[i]++; 62 for ( i=0; i<n+1; i++ ) a->i[i]++; 63 *ia = a->i; *ja = a->j; 64 } else { 65 *ia = a->i; *ja = a->j; 66 } 67 68 return 0; 69 } 70 71 #undef __FUNC__ 72 #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ" /* ADIC Ignore */ 73 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 74 PetscTruth *done) 75 { 76 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 77 int i,n = a->mbs; 78 79 if (!ia) return 0; 80 if (symmetric) { 81 PetscFree(*ia); 82 PetscFree(*ja); 83 } else if (oshift == 1) { 84 int nz = a->i[n]; 85 for ( i=0; i<nz; i++ ) a->j[i]--; 86 for ( i=0; i<n+1; i++ ) a->i[i]--; 87 } 88 return 0; 89 } 90 91 92 #undef __FUNC__ 93 #define __FUNC__ "MatView_SeqBAIJ_Binary" /* ADIC Ignore */ 94 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 95 { 96 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 97 int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 98 Scalar *aa; 99 100 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 101 col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 102 col_lens[0] = MAT_COOKIE; 103 104 col_lens[1] = a->m; 105 col_lens[2] = a->n; 106 col_lens[3] = a->nz*bs2; 107 108 /* store lengths of each row and write (including header) to file */ 109 count = 0; 110 for ( i=0; i<a->mbs; i++ ) { 111 for ( j=0; j<bs; j++ ) { 112 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 113 } 114 } 115 ierr = PetscBinaryWrite(fd,col_lens,4+a->m,BINARY_INT,1); CHKERRQ(ierr); 116 PetscFree(col_lens); 117 118 /* store column indices (zero start index) */ 119 jj = (int *) PetscMalloc( (a->nz+1)*bs2*sizeof(int) ); CHKPTRQ(jj); 120 count = 0; 121 for ( i=0; i<a->mbs; i++ ) { 122 for ( j=0; j<bs; j++ ) { 123 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 124 for ( l=0; l<bs; l++ ) { 125 jj[count++] = bs*a->j[k] + l; 126 } 127 } 128 } 129 } 130 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,BINARY_INT,0); CHKERRQ(ierr); 131 PetscFree(jj); 132 133 /* store nonzero values */ 134 aa = (Scalar *) PetscMalloc((a->nz+1)*bs2*sizeof(Scalar)); CHKPTRQ(aa); 135 count = 0; 136 for ( i=0; i<a->mbs; i++ ) { 137 for ( j=0; j<bs; j++ ) { 138 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 139 for ( l=0; l<bs; l++ ) { 140 aa[count++] = a->a[bs2*k + l*bs + j]; 141 } 142 } 143 } 144 } 145 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,BINARY_SCALAR,0); CHKERRQ(ierr); 146 PetscFree(aa); 147 return 0; 148 } 149 150 #undef __FUNC__ 151 #define __FUNC__ "MatView_SeqBAIJ_ASCII" /* ADIC Ignore */ 152 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 153 { 154 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 155 int ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2; 156 FILE *fd; 157 char *outputname; 158 159 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 160 ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr); 161 ierr = ViewerGetFormat(viewer,&format); 162 if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) { 163 fprintf(fd," block size is %d\n",bs); 164 } 165 else if (format == VIEWER_FORMAT_ASCII_MATLAB) { 166 SETERRQ(1,0,"Matlab format not supported"); 167 } 168 else if (format == VIEWER_FORMAT_ASCII_COMMON) { 169 for ( i=0; i<a->mbs; i++ ) { 170 for ( j=0; j<bs; j++ ) { 171 fprintf(fd,"row %d:",i*bs+j); 172 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 173 for ( l=0; l<bs; l++ ) { 174 #if defined(PETSC_COMPLEX) 175 if (imag(a->a[bs2*k + l*bs + j]) > 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0) 176 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 177 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 178 else if (imag(a->a[bs2*k + l*bs + j]) < 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0) 179 fprintf(fd," %d %g - %g i",bs*a->j[k]+l, 180 real(a->a[bs2*k + l*bs + j]),-imag(a->a[bs2*k + l*bs + j])); 181 else if (real(a->a[bs2*k + l*bs + j]) != 0.0) 182 fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 183 #else 184 if (a->a[bs2*k + l*bs + j] != 0.0) 185 fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 186 #endif 187 } 188 } 189 fprintf(fd,"\n"); 190 } 191 } 192 } 193 else { 194 for ( i=0; i<a->mbs; i++ ) { 195 for ( j=0; j<bs; j++ ) { 196 fprintf(fd,"row %d:",i*bs+j); 197 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 198 for ( l=0; l<bs; l++ ) { 199 #if defined(PETSC_COMPLEX) 200 if (imag(a->a[bs2*k + l*bs + j]) > 0.0) { 201 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 202 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 203 } 204 else if (imag(a->a[bs2*k + l*bs + j]) < 0.0) { 205 fprintf(fd," %d %g - %g i",bs*a->j[k]+l, 206 real(a->a[bs2*k + l*bs + j]),-imag(a->a[bs2*k + l*bs + j])); 207 } 208 else { 209 fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 210 } 211 #else 212 fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 213 #endif 214 } 215 } 216 fprintf(fd,"\n"); 217 } 218 } 219 } 220 fflush(fd); 221 return 0; 222 } 223 224 #undef __FUNC__ 225 #define __FUNC__ "MatView_SeqBAIJ_Draw" /* ADIC Ignore */ 226 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer) 227 { 228 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *) A->data; 229 int row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2; 230 double xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r; 231 Scalar *aa; 232 Draw draw; 233 DrawButton button; 234 PetscTruth isnull; 235 236 ViewerDrawGetDraw(viewer,&draw); 237 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 238 239 xr = a->n; yr = a->m; h = yr/10.0; w = xr/10.0; 240 xr += w; yr += h; xl = -w; yl = -h; 241 ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr); 242 /* loop over matrix elements drawing boxes */ 243 color = DRAW_BLUE; 244 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 245 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 246 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 247 x_l = a->j[j]*bs; x_r = x_l + 1.0; 248 aa = a->a + j*bs2; 249 for ( k=0; k<bs; k++ ) { 250 for ( l=0; l<bs; l++ ) { 251 if (PetscReal(*aa++) >= 0.) continue; 252 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 253 } 254 } 255 } 256 } 257 color = DRAW_CYAN; 258 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 259 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 260 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 261 x_l = a->j[j]*bs; x_r = x_l + 1.0; 262 aa = a->a + j*bs2; 263 for ( k=0; k<bs; k++ ) { 264 for ( l=0; l<bs; l++ ) { 265 if (PetscReal(*aa++) != 0.) continue; 266 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 267 } 268 } 269 } 270 } 271 272 color = DRAW_RED; 273 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 274 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 275 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 276 x_l = a->j[j]*bs; x_r = x_l + 1.0; 277 aa = a->a + j*bs2; 278 for ( k=0; k<bs; k++ ) { 279 for ( l=0; l<bs; l++ ) { 280 if (PetscReal(*aa++) <= 0.) continue; 281 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 282 } 283 } 284 } 285 } 286 287 DrawFlush(draw); 288 DrawGetPause(draw,&pause); 289 if (pause >= 0) { PetscSleep(pause); return 0;} 290 291 /* allow the matrix to zoom or shrink */ 292 ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0); 293 while (button != BUTTON_RIGHT) { 294 DrawClear(draw); 295 if (button == BUTTON_LEFT) scale = .5; 296 else if (button == BUTTON_CENTER) scale = 2.; 297 xl = scale*(xl + w - xc) + xc - w*scale; 298 xr = scale*(xr - w - xc) + xc + w*scale; 299 yl = scale*(yl + h - yc) + yc - h*scale; 300 yr = scale*(yr - h - yc) + yc + h*scale; 301 w *= scale; h *= scale; 302 ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr); 303 color = DRAW_BLUE; 304 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 305 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 306 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 307 x_l = a->j[j]*bs; x_r = x_l + 1.0; 308 aa = a->a + j*bs2; 309 for ( k=0; k<bs; k++ ) { 310 for ( l=0; l<bs; l++ ) { 311 if (PetscReal(*aa++) >= 0.) continue; 312 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 313 } 314 } 315 } 316 } 317 color = DRAW_CYAN; 318 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 319 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 320 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 321 x_l = a->j[j]*bs; x_r = x_l + 1.0; 322 aa = a->a + j*bs2; 323 for ( k=0; k<bs; k++ ) { 324 for ( l=0; l<bs; l++ ) { 325 if (PetscReal(*aa++) != 0.) continue; 326 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 327 } 328 } 329 } 330 } 331 332 color = DRAW_RED; 333 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 334 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 335 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 336 x_l = a->j[j]*bs; x_r = x_l + 1.0; 337 aa = a->a + j*bs2; 338 for ( k=0; k<bs; k++ ) { 339 for ( l=0; l<bs; l++ ) { 340 if (PetscReal(*aa++) <= 0.) continue; 341 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 342 } 343 } 344 } 345 } 346 ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0); 347 } 348 return 0; 349 } 350 351 #undef __FUNC__ 352 #define __FUNC__ "MatView_SeqBAIJ" /* ADIC Ignore */ 353 int MatView_SeqBAIJ(PetscObject obj,Viewer viewer) 354 { 355 Mat A = (Mat) obj; 356 ViewerType vtype; 357 int ierr; 358 359 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 360 if (vtype == MATLAB_VIEWER) { 361 SETERRQ(1,0,"Matlab viewer not supported"); 362 } 363 else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){ 364 return MatView_SeqBAIJ_ASCII(A,viewer); 365 } 366 else if (vtype == BINARY_FILE_VIEWER) { 367 return MatView_SeqBAIJ_Binary(A,viewer); 368 } 369 else if (vtype == DRAW_VIEWER) { 370 return MatView_SeqBAIJ_Draw(A,viewer); 371 } 372 return 0; 373 } 374 375 #define CHUNKSIZE 10 376 377 #undef __FUNC__ 378 #define __FUNC__ "MatSetValues_SeqBAIJ" 379 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 380 { 381 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 382 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 383 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 384 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 385 int ridx,cidx,bs2=a->bs2; 386 Scalar *ap,value,*aa=a->a,*bap; 387 388 for ( k=0; k<m; k++ ) { /* loop over added rows */ 389 row = im[k]; brow = row/bs; 390 #if defined(PETSC_BOPT_g) 391 if (row < 0) SETERRQ(1,0,"Negative row"); 392 if (row >= a->m) SETERRQ(1,0,"Row too large"); 393 #endif 394 rp = aj + ai[brow]; 395 ap = aa + bs2*ai[brow]; 396 rmax = imax[brow]; 397 nrow = ailen[brow]; 398 low = 0; 399 for ( l=0; l<n; l++ ) { /* loop over added columns */ 400 #if defined(PETSC_BOPT_g) 401 if (in[l] < 0) SETERRQ(1,0,"Negative column"); 402 if (in[l] >= a->n) SETERRQ(1,0,"Column too large"); 403 #endif 404 col = in[l]; bcol = col/bs; 405 ridx = row % bs; cidx = col % bs; 406 if (roworiented) { 407 value = *v++; 408 } 409 else { 410 value = v[k + l*m]; 411 } 412 if (!sorted) low = 0; high = nrow; 413 while (high-low > 7) { 414 t = (low+high)/2; 415 if (rp[t] > bcol) high = t; 416 else low = t; 417 } 418 for ( i=low; i<high; i++ ) { 419 if (rp[i] > bcol) break; 420 if (rp[i] == bcol) { 421 bap = ap + bs2*i + bs*cidx + ridx; 422 if (is == ADD_VALUES) *bap += value; 423 else *bap = value; 424 goto noinsert1; 425 } 426 } 427 if (nonew == 1) goto noinsert1; 428 else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); 429 if (nrow >= rmax) { 430 /* there is no extra room in row, therefore enlarge */ 431 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 432 Scalar *new_a; 433 434 if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); 435 436 /* Malloc new storage space */ 437 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 438 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 439 new_j = (int *) (new_a + bs2*new_nz); 440 new_i = new_j + new_nz; 441 442 /* copy over old data into new slots */ 443 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} 444 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 445 PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); 446 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 447 PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, 448 len*sizeof(int)); 449 PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); 450 PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 451 PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), 452 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); 453 /* free up old matrix storage */ 454 PetscFree(a->a); 455 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 456 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 457 a->singlemalloc = 1; 458 459 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 460 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 461 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 462 a->maxnz += bs2*CHUNKSIZE; 463 a->reallocs++; 464 a->nz++; 465 } 466 N = nrow++ - 1; 467 /* shift up all the later entries in this row */ 468 for ( ii=N; ii>=i; ii-- ) { 469 rp[ii+1] = rp[ii]; 470 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 471 } 472 if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 473 rp[i] = bcol; 474 ap[bs2*i + bs*cidx + ridx] = value; 475 noinsert1:; 476 low = i; 477 } 478 ailen[brow] = nrow; 479 } 480 return 0; 481 } 482 483 #undef __FUNC__ 484 #define __FUNC__ "MatSetValuesBlocked_SeqBAIJ" 485 int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 486 { 487 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 488 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 489 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 490 int roworiented=a->roworiented; 491 int *aj=a->j,nonew=a->nonew; 492 int bs2=a->bs2,bs=a->bs,stepval; 493 Scalar *ap,*value=v,*aa=a->a,*bap; 494 495 if (roworiented) { 496 stepval = (n-1)*bs; 497 } else { 498 stepval = (m-1)*bs; 499 } 500 for ( k=0; k<m; k++ ) { /* loop over added rows */ 501 row = im[k]; 502 #if defined(PETSC_BOPT_g) 503 if (row < 0) SETERRQ(1,0,"Negative row"); 504 if (row >= a->mbs) SETERRQ(1,0,"Row too large"); 505 #endif 506 rp = aj + ai[row]; 507 ap = aa + bs2*ai[row]; 508 rmax = imax[row]; 509 nrow = ailen[row]; 510 low = 0; 511 for ( l=0; l<n; l++ ) { /* loop over added columns */ 512 #if defined(PETSC_BOPT_g) 513 if (in[l] < 0) SETERRQ(1,0,"Negative column"); 514 if (in[l] >= a->nbs) SETERRQ(1,0,"Column too large"); 515 #endif 516 col = in[l]; 517 if (roworiented) { 518 value = v + k*(stepval+bs)*bs + l*bs; 519 } else { 520 value = v + l*(stepval+bs)*bs + k*bs; 521 } 522 if (!sorted) low = 0; high = nrow; 523 while (high-low > 7) { 524 t = (low+high)/2; 525 if (rp[t] > col) high = t; 526 else low = t; 527 } 528 for ( i=low; i<high; i++ ) { 529 if (rp[i] > col) break; 530 if (rp[i] == col) { 531 bap = ap + bs2*i; 532 if (roworiented) { 533 if (is == ADD_VALUES) { 534 for ( ii=0; ii<bs; ii++,value+=stepval ) 535 for (jj=ii; jj<bs2; jj+=bs ) 536 bap[jj] += *value++; 537 } else { 538 for ( ii=0; ii<bs; ii++,value+=stepval ) 539 for (jj=ii; jj<bs2; jj+=bs ) 540 bap[jj] = *value++; 541 } 542 } else { 543 if (is == ADD_VALUES) { 544 for ( ii=0; ii<bs; ii++,value+=stepval ) 545 for (jj=0; jj<bs; jj++ ) 546 *bap++ += *value++; 547 } else { 548 for ( ii=0; ii<bs; ii++,value+=stepval ) 549 for (jj=0; jj<bs; jj++ ) 550 *bap++ = *value++; 551 } 552 } 553 goto noinsert2; 554 } 555 } 556 if (nonew == 1) goto noinsert2; 557 else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); 558 if (nrow >= rmax) { 559 /* there is no extra room in row, therefore enlarge */ 560 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 561 Scalar *new_a; 562 563 if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); 564 565 /* malloc new storage space */ 566 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 567 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 568 new_j = (int *) (new_a + bs2*new_nz); 569 new_i = new_j + new_nz; 570 571 /* copy over old data into new slots */ 572 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} 573 for ( ii=row+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 574 PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int)); 575 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 576 PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow, 577 len*sizeof(int)); 578 PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(Scalar)); 579 PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 580 PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE), 581 aa+bs2*(ai[row]+nrow),bs2*len*sizeof(Scalar)); 582 /* free up old matrix storage */ 583 PetscFree(a->a); 584 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 585 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 586 a->singlemalloc = 1; 587 588 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 589 rmax = imax[row] = imax[row] + CHUNKSIZE; 590 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 591 a->maxnz += bs2*CHUNKSIZE; 592 a->reallocs++; 593 a->nz++; 594 } 595 N = nrow++ - 1; 596 /* shift up all the later entries in this row */ 597 for ( ii=N; ii>=i; ii-- ) { 598 rp[ii+1] = rp[ii]; 599 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 600 } 601 if (N >= i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 602 rp[i] = col; 603 bap = ap + bs2*i; 604 if (roworiented) { 605 for ( ii=0; ii<bs; ii++,value+=stepval) 606 for (jj=ii; jj<bs2; jj+=bs ) 607 bap[jj] = *value++; 608 } else { 609 for ( ii=0; ii<bs; ii++,value+=stepval ) 610 for (jj=0; jj<bs; jj++ ) 611 *bap++ = *value++; 612 } 613 noinsert2:; 614 low = i; 615 } 616 ailen[row] = nrow; 617 } 618 return 0; 619 } 620 621 #undef __FUNC__ 622 #define __FUNC__ "MatGetSize_SeqBAIJ" /* ADIC Ignore */ 623 int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 624 { 625 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 626 *m = a->m; *n = a->n; 627 return 0; 628 } 629 630 #undef __FUNC__ 631 #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ" /* ADIC Ignore */ 632 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 633 { 634 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 635 *m = 0; *n = a->m; 636 return 0; 637 } 638 639 #undef __FUNC__ 640 #define __FUNC__ "MatGetRow_SeqBAIJ" 641 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 642 { 643 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 644 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 645 Scalar *aa,*v_i,*aa_i; 646 647 bs = a->bs; 648 ai = a->i; 649 aj = a->j; 650 aa = a->a; 651 bs2 = a->bs2; 652 653 if (row < 0 || row >= a->m) SETERRQ(1,0,"Row out of range"); 654 655 bn = row/bs; /* Block number */ 656 bp = row % bs; /* Block Position */ 657 M = ai[bn+1] - ai[bn]; 658 *nz = bs*M; 659 660 if (v) { 661 *v = 0; 662 if (*nz) { 663 *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v); 664 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 665 v_i = *v + i*bs; 666 aa_i = aa + bs2*(ai[bn] + i); 667 for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];} 668 } 669 } 670 } 671 672 if (idx) { 673 *idx = 0; 674 if (*nz) { 675 *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx); 676 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 677 idx_i = *idx + i*bs; 678 itmp = bs*aj[ai[bn] + i]; 679 for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;} 680 } 681 } 682 } 683 return 0; 684 } 685 686 #undef __FUNC__ 687 #define __FUNC__ "MatRestoreRow_SeqBAIJ" /* ADIC Ignore */ 688 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 689 { 690 if (idx) {if (*idx) PetscFree(*idx);} 691 if (v) {if (*v) PetscFree(*v);} 692 return 0; 693 } 694 695 #undef __FUNC__ 696 #define __FUNC__ "MatTranspose_SeqBAIJ" 697 int MatTranspose_SeqBAIJ(Mat A,Mat *B) 698 { 699 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 700 Mat C; 701 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 702 int *rows,*cols,bs2=a->bs2; 703 Scalar *array=a->a; 704 705 if (B==PETSC_NULL && mbs!=nbs) 706 SETERRQ(1,0,"Square matrix only for in-place"); 707 col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col); 708 PetscMemzero(col,(1+nbs)*sizeof(int)); 709 710 for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1; 711 ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr); 712 PetscFree(col); 713 rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows); 714 cols = rows + bs; 715 for ( i=0; i<mbs; i++ ) { 716 cols[0] = i*bs; 717 for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1; 718 len = ai[i+1] - ai[i]; 719 for ( j=0; j<len; j++ ) { 720 rows[0] = (*aj++)*bs; 721 for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1; 722 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr); 723 array += bs2; 724 } 725 } 726 PetscFree(rows); 727 728 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 729 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 730 731 if (B != PETSC_NULL) { 732 *B = C; 733 } else { 734 /* This isn't really an in-place transpose */ 735 PetscFree(a->a); 736 if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);} 737 if (a->diag) PetscFree(a->diag); 738 if (a->ilen) PetscFree(a->ilen); 739 if (a->imax) PetscFree(a->imax); 740 if (a->solve_work) PetscFree(a->solve_work); 741 PetscFree(a); 742 PetscMemcpy(A,C,sizeof(struct _p_Mat)); 743 PetscHeaderDestroy(C); 744 } 745 return 0; 746 } 747 748 749 #undef __FUNC__ 750 #define __FUNC__ "MatAssemblyEnd_SeqBAIJ" 751 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 752 { 753 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 754 int fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax; 755 int m = a->m,*ip, N, *ailen = a->ilen; 756 int mbs = a->mbs, bs2 = a->bs2,rmax; 757 Scalar *aa = a->a, *ap; 758 759 if (mode == MAT_FLUSH_ASSEMBLY) return 0; 760 761 rmax = ailen[0]; 762 for ( i=1; i<mbs; i++ ) { 763 /* move each row back by the amount of empty slots (fshift) before it*/ 764 fshift += imax[i-1] - ailen[i-1]; 765 rmax = PetscMax(rmax,ailen[i]); 766 if (fshift) { 767 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 768 N = ailen[i]; 769 for ( j=0; j<N; j++ ) { 770 ip[j-fshift] = ip[j]; 771 PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar)); 772 } 773 } 774 ai[i] = ai[i-1] + ailen[i-1]; 775 } 776 if (mbs) { 777 fshift += imax[mbs-1] - ailen[mbs-1]; 778 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 779 } 780 /* reset ilen and imax for each row */ 781 for ( i=0; i<mbs; i++ ) { 782 ailen[i] = imax[i] = ai[i+1] - ai[i]; 783 } 784 a->nz = ai[mbs]; 785 786 /* diagonals may have moved, so kill the diagonal pointers */ 787 if (fshift && a->diag) { 788 PetscFree(a->diag); 789 PLogObjectMemory(A,-(m+1)*sizeof(int)); 790 a->diag = 0; 791 } 792 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 793 m,a->n,a->bs,fshift*bs2,a->nz*bs2); 794 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n", 795 a->reallocs); 796 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 797 a->reallocs = 0; 798 A->info.nz_unneeded = (double)fshift*bs2; 799 800 return 0; 801 } 802 803 #undef __FUNC__ 804 #define __FUNC__ "MatZeroEntries_SeqBAIJ" 805 int MatZeroEntries_SeqBAIJ(Mat A) 806 { 807 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 808 PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar)); 809 return 0; 810 } 811 812 #undef __FUNC__ 813 #define __FUNC__ "MatDestroy_SeqBAIJ" /* ADIC Ignore */ 814 int MatDestroy_SeqBAIJ(PetscObject obj) 815 { 816 Mat A = (Mat) obj; 817 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 818 819 #if defined(PETSC_LOG) 820 PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 821 #endif 822 PetscFree(a->a); 823 if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);} 824 if (a->diag) PetscFree(a->diag); 825 if (a->ilen) PetscFree(a->ilen); 826 if (a->imax) PetscFree(a->imax); 827 if (a->solve_work) PetscFree(a->solve_work); 828 if (a->mult_work) PetscFree(a->mult_work); 829 PetscFree(a); 830 PLogObjectDestroy(A); 831 PetscHeaderDestroy(A); 832 return 0; 833 } 834 835 #undef __FUNC__ 836 #define __FUNC__ "MatSetOption_SeqBAIJ" /* ADIC Ignore */ 837 int MatSetOption_SeqBAIJ(Mat A,MatOption op) 838 { 839 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 840 if (op == MAT_ROW_ORIENTED) a->roworiented = 1; 841 else if (op == MAT_COLUMN_ORIENTED) a->roworiented = 0; 842 else if (op == MAT_COLUMNS_SORTED) a->sorted = 1; 843 else if (op == MAT_COLUMNS_UNSORTED) a->sorted = 0; 844 else if (op == MAT_NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 845 else if (op == MAT_NEW_NONZERO_LOCATION_ERROR) a->nonew = -1; 846 else if (op == MAT_NEW_NONZERO_ALLOCATION_ERROR) a->nonew = -2; 847 else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 848 else if (op == MAT_ROWS_SORTED || 849 op == MAT_ROWS_UNSORTED || 850 op == MAT_SYMMETRIC || 851 op == MAT_STRUCTURALLY_SYMMETRIC || 852 op == MAT_YES_NEW_DIAGONALS || 853 op == MAT_IGNORE_OFF_PROC_ENTRIES) 854 PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n"); 855 else if (op == MAT_NO_NEW_DIAGONALS) 856 {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");} 857 else 858 {SETERRQ(PETSC_ERR_SUP,0,"unknown option");} 859 return 0; 860 } 861 862 863 /* -------------------------------------------------------*/ 864 /* Should check that shapes of vectors and matrices match */ 865 /* -------------------------------------------------------*/ 866 #include "pinclude/plapack.h" 867 868 #undef __FUNC__ 869 #define __FUNC__ "MatMult_SeqBAIJ_1" 870 static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz) 871 { 872 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 873 register Scalar *x,*z,*v,sum; 874 int mbs=a->mbs,i,*idx,*ii,n; 875 876 VecGetArray_Fast(xx,x); 877 VecGetArray_Fast(zz,z); 878 879 idx = a->j; 880 v = a->a; 881 ii = a->i; 882 883 for ( i=0; i<mbs; i++ ) { 884 n = ii[1] - ii[0]; ii++; 885 sum = 0.0; 886 while (n--) sum += *v++ * x[*idx++]; 887 z[i] = sum; 888 } 889 VecRestoreArray_Fast(xx,x); 890 VecRestoreArray_Fast(zz,z); 891 PLogFlops(2*a->nz - a->m); 892 return 0; 893 } 894 895 #undef __FUNC__ 896 #define __FUNC__ "MatMult_SeqBAIJ_2" 897 static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz) 898 { 899 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 900 register Scalar *x,*z,*v,*xb,sum1,sum2; 901 register Scalar x1,x2; 902 int mbs=a->mbs,i,*idx,*ii; 903 int j,n; 904 905 VecGetArray_Fast(xx,x); 906 VecGetArray_Fast(zz,z); 907 908 idx = a->j; 909 v = a->a; 910 ii = a->i; 911 912 for ( i=0; i<mbs; i++ ) { 913 n = ii[1] - ii[0]; ii++; 914 sum1 = 0.0; sum2 = 0.0; 915 for ( j=0; j<n; j++ ) { 916 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 917 sum1 += v[0]*x1 + v[2]*x2; 918 sum2 += v[1]*x1 + v[3]*x2; 919 v += 4; 920 } 921 z[0] = sum1; z[1] = sum2; 922 z += 2; 923 } 924 VecRestoreArray_Fast(xx,x); 925 VecRestoreArray_Fast(zz,z); 926 PLogFlops(4*a->nz - a->m); 927 return 0; 928 } 929 930 #undef __FUNC__ 931 #define __FUNC__ "MatMult_SeqBAIJ_3" 932 static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz) 933 { 934 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 935 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 936 int mbs=a->mbs,i,*idx,*ii,j,n; 937 938 VecGetArray_Fast(xx,x); 939 VecGetArray_Fast(zz,z); 940 941 idx = a->j; 942 v = a->a; 943 ii = a->i; 944 945 for ( i=0; i<mbs; i++ ) { 946 n = ii[1] - ii[0]; ii++; 947 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 948 for ( j=0; j<n; j++ ) { 949 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 950 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 951 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 952 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 953 v += 9; 954 } 955 z[0] = sum1; z[1] = sum2; z[2] = sum3; 956 z += 3; 957 } 958 VecRestoreArray_Fast(xx,x); 959 VecRestoreArray_Fast(zz,z); 960 PLogFlops(18*a->nz - a->m); 961 return 0; 962 } 963 964 #undef __FUNC__ 965 #define __FUNC__ "MatMult_SeqBAIJ_4" 966 static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz) 967 { 968 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 969 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4; 970 register Scalar x1,x2,x3,x4; 971 int mbs=a->mbs,i,*idx,*ii; 972 int j,n; 973 974 VecGetArray_Fast(xx,x); 975 VecGetArray_Fast(zz,z); 976 977 idx = a->j; 978 v = a->a; 979 ii = a->i; 980 981 for ( i=0; i<mbs; i++ ) { 982 n = ii[1] - ii[0]; ii++; 983 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 984 for ( j=0; j<n; j++ ) { 985 xb = x + 4*(*idx++); 986 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 987 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 988 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 989 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 990 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 991 v += 16; 992 } 993 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 994 z += 4; 995 } 996 VecRestoreArray_Fast(xx,x); 997 VecRestoreArray_Fast(zz,z); 998 PLogFlops(32*a->nz - a->m); 999 return 0; 1000 } 1001 1002 #undef __FUNC__ 1003 #define __FUNC__ "MatMult_SeqBAIJ_5" 1004 static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz) 1005 { 1006 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1007 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5; 1008 register Scalar x1,x2,x3,x4,x5; 1009 int mbs=a->mbs,i,*idx,*ii,j,n; 1010 1011 VecGetArray_Fast(xx,x); 1012 VecGetArray_Fast(zz,z); 1013 1014 idx = a->j; 1015 v = a->a; 1016 ii = a->i; 1017 1018 for ( i=0; i<mbs; i++ ) { 1019 n = ii[1] - ii[0]; ii++; 1020 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 1021 for ( j=0; j<n; j++ ) { 1022 xb = x + 5*(*idx++); 1023 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 1024 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 1025 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 1026 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 1027 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 1028 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 1029 v += 25; 1030 } 1031 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 1032 z += 5; 1033 } 1034 VecRestoreArray_Fast(xx,x); 1035 VecRestoreArray_Fast(zz,z); 1036 PLogFlops(50*a->nz - a->m); 1037 return 0; 1038 } 1039 1040 #undef __FUNC__ 1041 #define __FUNC__ "MatMult_SeqBAIJ_7" 1042 static int MatMult_SeqBAIJ_7(Mat A,Vec xx,Vec zz) 1043 { 1044 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1045 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7; 1046 register Scalar x1,x2,x3,x4,x5,x6,x7; 1047 int mbs=a->mbs,i,*idx,*ii,j,n; 1048 1049 VecGetArray_Fast(xx,x); 1050 VecGetArray_Fast(zz,z); 1051 1052 idx = a->j; 1053 v = a->a; 1054 ii = a->i; 1055 1056 for ( i=0; i<mbs; i++ ) { 1057 n = ii[1] - ii[0]; ii++; 1058 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0; 1059 for ( j=0; j<n; j++ ) { 1060 xb = x + 7*(*idx++); 1061 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6]; 1062 sum1 += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 1063 sum2 += v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 1064 sum3 += v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 1065 sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 1066 sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 1067 sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 1068 sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 1069 v += 49; 1070 } 1071 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7; 1072 z += 7; 1073 } 1074 1075 VecRestoreArray_Fast(xx,x); 1076 VecRestoreArray_Fast(zz,z); 1077 PLogFlops(98*a->nz - a->m); 1078 return 0; 1079 } 1080 1081 #undef __FUNC__ 1082 #define __FUNC__ "MatMult_SeqBAIJ_N" 1083 static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz) 1084 { 1085 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1086 register Scalar *x,*z,*v,*xb; 1087 int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2; 1088 int _One = 1,ncols,k; 1089 Scalar _DOne = 1.0,*work,*workt,_DZero = 0.0; 1090 1091 VecGetArray_Fast(xx,x); 1092 VecGetArray_Fast(zz,z); 1093 1094 idx = a->j; 1095 v = a->a; 1096 ii = a->i; 1097 1098 1099 if (!a->mult_work) { 1100 k = PetscMax(a->m,a->n); 1101 a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work); 1102 } 1103 work = a->mult_work; 1104 for ( i=0; i<mbs; i++ ) { 1105 n = ii[1] - ii[0]; ii++; 1106 ncols = n*bs; 1107 workt = work; 1108 for ( j=0; j<n; j++ ) { 1109 xb = x + bs*(*idx++); 1110 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 1111 workt += bs; 1112 } 1113 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One); 1114 v += n*bs2; 1115 z += bs; 1116 } 1117 VecRestoreArray_Fast(xx,x); 1118 VecRestoreArray_Fast(zz,z); 1119 PLogFlops(2*a->nz*bs2 - a->m); 1120 return 0; 1121 } 1122 1123 #undef __FUNC__ 1124 #define __FUNC__ "MatMultAdd_SeqBAIJ_1" 1125 static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 1126 { 1127 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1128 register Scalar *x,*y,*z,*v,sum; 1129 int mbs=a->mbs,i,*idx,*ii,n; 1130 1131 VecGetArray_Fast(xx,x); 1132 VecGetArray_Fast(yy,y); 1133 VecGetArray_Fast(zz,z); 1134 1135 idx = a->j; 1136 v = a->a; 1137 ii = a->i; 1138 1139 for ( i=0; i<mbs; i++ ) { 1140 n = ii[1] - ii[0]; ii++; 1141 sum = y[i]; 1142 while (n--) sum += *v++ * x[*idx++]; 1143 z[i] = sum; 1144 } 1145 VecRestoreArray_Fast(xx,x); 1146 VecRestoreArray_Fast(yy,y); 1147 VecRestoreArray_Fast(zz,z); 1148 PLogFlops(2*a->nz); 1149 return 0; 1150 } 1151 1152 #undef __FUNC__ 1153 #define __FUNC__ "MatMultAdd_SeqBAIJ_2" 1154 static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 1155 { 1156 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1157 register Scalar *x,*y,*z,*v,*xb,sum1,sum2; 1158 register Scalar x1,x2; 1159 int mbs=a->mbs,i,*idx,*ii; 1160 int j,n; 1161 1162 VecGetArray_Fast(xx,x); 1163 VecGetArray_Fast(yy,y); 1164 VecGetArray_Fast(zz,z); 1165 1166 idx = a->j; 1167 v = a->a; 1168 ii = a->i; 1169 1170 for ( i=0; i<mbs; i++ ) { 1171 n = ii[1] - ii[0]; ii++; 1172 sum1 = y[0]; sum2 = y[1]; 1173 for ( j=0; j<n; j++ ) { 1174 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 1175 sum1 += v[0]*x1 + v[2]*x2; 1176 sum2 += v[1]*x1 + v[3]*x2; 1177 v += 4; 1178 } 1179 z[0] = sum1; z[1] = sum2; 1180 z += 2; y += 2; 1181 } 1182 VecRestoreArray_Fast(xx,x); 1183 VecRestoreArray_Fast(yy,y); 1184 VecRestoreArray_Fast(zz,z); 1185 PLogFlops(4*a->nz); 1186 return 0; 1187 } 1188 1189 #undef __FUNC__ 1190 #define __FUNC__ "MatMultAdd_SeqBAIJ_3" 1191 static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 1192 { 1193 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1194 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 1195 int mbs=a->mbs,i,*idx,*ii,j,n; 1196 1197 VecGetArray_Fast(xx,x); 1198 VecGetArray_Fast(yy,y); 1199 VecGetArray_Fast(zz,z); 1200 1201 idx = a->j; 1202 v = a->a; 1203 ii = a->i; 1204 1205 for ( i=0; i<mbs; i++ ) { 1206 n = ii[1] - ii[0]; ii++; 1207 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; 1208 for ( j=0; j<n; j++ ) { 1209 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1210 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 1211 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 1212 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 1213 v += 9; 1214 } 1215 z[0] = sum1; z[1] = sum2; z[2] = sum3; 1216 z += 3; y += 3; 1217 } 1218 VecRestoreArray_Fast(xx,x); 1219 VecRestoreArray_Fast(yy,y); 1220 VecRestoreArray_Fast(zz,z); 1221 PLogFlops(18*a->nz); 1222 return 0; 1223 } 1224 1225 #undef __FUNC__ 1226 #define __FUNC__ "MatMultAdd_SeqBAIJ_4" 1227 static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 1228 { 1229 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1230 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4; 1231 register Scalar x1,x2,x3,x4; 1232 int mbs=a->mbs,i,*idx,*ii; 1233 int j,n; 1234 1235 VecGetArray_Fast(xx,x); 1236 VecGetArray_Fast(yy,y); 1237 VecGetArray_Fast(zz,z); 1238 1239 idx = a->j; 1240 v = a->a; 1241 ii = a->i; 1242 1243 for ( i=0; i<mbs; i++ ) { 1244 n = ii[1] - ii[0]; ii++; 1245 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; 1246 for ( j=0; j<n; j++ ) { 1247 xb = x + 4*(*idx++); 1248 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1249 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 1250 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 1251 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 1252 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 1253 v += 16; 1254 } 1255 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 1256 z += 4; y += 4; 1257 } 1258 VecRestoreArray_Fast(xx,x); 1259 VecRestoreArray_Fast(yy,y); 1260 VecRestoreArray_Fast(zz,z); 1261 PLogFlops(32*a->nz); 1262 return 0; 1263 } 1264 1265 #undef __FUNC__ 1266 #define __FUNC__ "MatMultAdd_SeqBAIJ_5" 1267 static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 1268 { 1269 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1270 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5; 1271 register Scalar x1,x2,x3,x4,x5; 1272 int mbs=a->mbs,i,*idx,*ii,j,n; 1273 1274 VecGetArray_Fast(xx,x); 1275 VecGetArray_Fast(yy,y); 1276 VecGetArray_Fast(zz,z); 1277 1278 idx = a->j; 1279 v = a->a; 1280 ii = a->i; 1281 1282 for ( i=0; i<mbs; i++ ) { 1283 n = ii[1] - ii[0]; ii++; 1284 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; 1285 for ( j=0; j<n; j++ ) { 1286 xb = x + 5*(*idx++); 1287 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 1288 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 1289 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 1290 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 1291 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 1292 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 1293 v += 25; 1294 } 1295 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 1296 z += 5; y += 5; 1297 } 1298 VecRestoreArray_Fast(xx,x); 1299 VecRestoreArray_Fast(yy,y); 1300 VecRestoreArray_Fast(zz,z); 1301 PLogFlops(50*a->nz); 1302 return 0; 1303 } 1304 1305 #undef __FUNC__ 1306 #define __FUNC__ "MatMultAdd_SeqBAIJ_7" 1307 static int MatMultAdd_SeqBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1308 { 1309 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1310 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7; 1311 register Scalar x1,x2,x3,x4,x5,x6,x7; 1312 int mbs=a->mbs,i,*idx,*ii,j,n; 1313 1314 VecGetArray_Fast(xx,x); 1315 VecGetArray_Fast(yy,y); 1316 VecGetArray_Fast(zz,z); 1317 1318 idx = a->j; 1319 v = a->a; 1320 ii = a->i; 1321 1322 for ( i=0; i<mbs; i++ ) { 1323 n = ii[1] - ii[0]; ii++; 1324 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; sum7 = y[6]; 1325 for ( j=0; j<n; j++ ) { 1326 xb = x + 7*(*idx++); 1327 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6]; 1328 sum1 += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 1329 sum2 += v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 1330 sum3 += v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 1331 sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 1332 sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 1333 sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 1334 sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 1335 v += 49; 1336 } 1337 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7; 1338 z += 7; y += 7; 1339 } 1340 VecRestoreArray_Fast(xx,x); 1341 VecRestoreArray_Fast(yy,y); 1342 VecRestoreArray_Fast(zz,z); 1343 PLogFlops(98*a->nz); 1344 return 0; 1345 } 1346 1347 1348 #undef __FUNC__ 1349 #define __FUNC__ "MatMultAdd_SeqBAIJ_N" 1350 static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 1351 { 1352 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1353 register Scalar *x,*z,*v,*xb; 1354 int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr; 1355 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1356 1357 if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 1358 1359 VecGetArray_Fast(xx,x); 1360 VecGetArray_Fast(zz,z); 1361 1362 idx = a->j; 1363 v = a->a; 1364 ii = a->i; 1365 1366 1367 if (!a->mult_work) { 1368 k = PetscMax(a->m,a->n); 1369 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work); 1370 } 1371 work = a->mult_work; 1372 for ( i=0; i<mbs; i++ ) { 1373 n = ii[1] - ii[0]; ii++; 1374 ncols = n*bs; 1375 workt = work; 1376 for ( j=0; j<n; j++ ) { 1377 xb = x + bs*(*idx++); 1378 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 1379 workt += bs; 1380 } 1381 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); 1382 v += n*bs2; 1383 z += bs; 1384 } 1385 VecRestoreArray_Fast(xx,x); 1386 VecRestoreArray_Fast(zz,z); 1387 PLogFlops(2*a->nz*bs2 ); 1388 return 0; 1389 } 1390 1391 #undef __FUNC__ 1392 #define __FUNC__ "MatMultTrans_SeqBAIJ" 1393 int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz) 1394 { 1395 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1396 Scalar *xg,*zg,*zb; 1397 register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 1398 int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 1399 int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1400 1401 1402 VecGetArray_Fast(xx,xg); x = xg; 1403 VecGetArray_Fast(zz,zg); z = zg; 1404 PetscMemzero(z,N*sizeof(Scalar)); 1405 1406 idx = a->j; 1407 v = a->a; 1408 ii = a->i; 1409 1410 switch (bs) { 1411 case 1: 1412 for ( i=0; i<mbs; i++ ) { 1413 n = ii[1] - ii[0]; ii++; 1414 xb = x + i; x1 = xb[0]; 1415 ib = idx + ai[i]; 1416 for ( j=0; j<n; j++ ) { 1417 rval = ib[j]; 1418 z[rval] += *v++ * x1; 1419 } 1420 } 1421 break; 1422 case 2: 1423 for ( i=0; i<mbs; i++ ) { 1424 n = ii[1] - ii[0]; ii++; 1425 xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1426 ib = idx + ai[i]; 1427 for ( j=0; j<n; j++ ) { 1428 rval = ib[j]*2; 1429 z[rval++] += v[0]*x1 + v[1]*x2; 1430 z[rval++] += v[2]*x1 + v[3]*x2; 1431 v += 4; 1432 } 1433 } 1434 break; 1435 case 3: 1436 for ( i=0; i<mbs; i++ ) { 1437 n = ii[1] - ii[0]; ii++; 1438 xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1439 ib = idx + ai[i]; 1440 for ( j=0; j<n; j++ ) { 1441 rval = ib[j]*3; 1442 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1443 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1444 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1445 v += 9; 1446 } 1447 } 1448 break; 1449 case 4: 1450 for ( i=0; i<mbs; i++ ) { 1451 n = ii[1] - ii[0]; ii++; 1452 xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1453 ib = idx + ai[i]; 1454 for ( j=0; j<n; j++ ) { 1455 rval = ib[j]*4; 1456 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1457 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1458 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1459 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1460 v += 16; 1461 } 1462 } 1463 break; 1464 case 5: 1465 for ( i=0; i<mbs; i++ ) { 1466 n = ii[1] - ii[0]; ii++; 1467 xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1468 x4 = xb[3]; x5 = xb[4]; 1469 ib = idx + ai[i]; 1470 for ( j=0; j<n; j++ ) { 1471 rval = ib[j]*5; 1472 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1473 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1474 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1475 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1476 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1477 v += 25; 1478 } 1479 } 1480 break; 1481 /* block sizes larger then 5 by 5 are handled by BLAS */ 1482 default: { 1483 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1484 if (!a->mult_work) { 1485 k = PetscMax(a->m,a->n); 1486 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1487 CHKPTRQ(a->mult_work); 1488 } 1489 work = a->mult_work; 1490 for ( i=0; i<mbs; i++ ) { 1491 n = ii[1] - ii[0]; ii++; 1492 ncols = n*bs; 1493 PetscMemzero(work,ncols*sizeof(Scalar)); 1494 LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1495 v += n*bs2; 1496 x += bs; 1497 workt = work; 1498 for ( j=0; j<n; j++ ) { 1499 zb = z + bs*(*idx++); 1500 for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1501 workt += bs; 1502 } 1503 } 1504 } 1505 } 1506 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 1507 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1508 PLogFlops(2*a->nz*a->bs2 - a->n); 1509 return 0; 1510 } 1511 1512 #undef __FUNC__ 1513 #define __FUNC__ "MatMultTransAdd_SeqBAIJ" 1514 int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1515 { 1516 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1517 Scalar *xg,*zg,*zb; 1518 register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 1519 int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 1520 int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1521 1522 1523 1524 VecGetArray_Fast(xx,xg); x = xg; 1525 VecGetArray_Fast(zz,zg); z = zg; 1526 1527 if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 1528 else PetscMemzero(z,N*sizeof(Scalar)); 1529 1530 1531 idx = a->j; 1532 v = a->a; 1533 ii = a->i; 1534 1535 switch (bs) { 1536 case 1: 1537 for ( i=0; i<mbs; i++ ) { 1538 n = ii[1] - ii[0]; ii++; 1539 xb = x + i; x1 = xb[0]; 1540 ib = idx + ai[i]; 1541 for ( j=0; j<n; j++ ) { 1542 rval = ib[j]; 1543 z[rval] += *v++ * x1; 1544 } 1545 } 1546 break; 1547 case 2: 1548 for ( i=0; i<mbs; i++ ) { 1549 n = ii[1] - ii[0]; ii++; 1550 xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1551 ib = idx + ai[i]; 1552 for ( j=0; j<n; j++ ) { 1553 rval = ib[j]*2; 1554 z[rval++] += v[0]*x1 + v[1]*x2; 1555 z[rval++] += v[2]*x1 + v[3]*x2; 1556 v += 4; 1557 } 1558 } 1559 break; 1560 case 3: 1561 for ( i=0; i<mbs; i++ ) { 1562 n = ii[1] - ii[0]; ii++; 1563 xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1564 ib = idx + ai[i]; 1565 for ( j=0; j<n; j++ ) { 1566 rval = ib[j]*3; 1567 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1568 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1569 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1570 v += 9; 1571 } 1572 } 1573 break; 1574 case 4: 1575 for ( i=0; i<mbs; i++ ) { 1576 n = ii[1] - ii[0]; ii++; 1577 xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1578 ib = idx + ai[i]; 1579 for ( j=0; j<n; j++ ) { 1580 rval = ib[j]*4; 1581 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1582 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1583 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1584 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1585 v += 16; 1586 } 1587 } 1588 break; 1589 case 5: 1590 for ( i=0; i<mbs; i++ ) { 1591 n = ii[1] - ii[0]; ii++; 1592 xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1593 x4 = xb[3]; x5 = xb[4]; 1594 ib = idx + ai[i]; 1595 for ( j=0; j<n; j++ ) { 1596 rval = ib[j]*5; 1597 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1598 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1599 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1600 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1601 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1602 v += 25; 1603 } 1604 } 1605 break; 1606 /* block sizes larger then 5 by 5 are handled by BLAS */ 1607 default: { 1608 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1609 if (!a->mult_work) { 1610 k = PetscMax(a->m,a->n); 1611 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1612 CHKPTRQ(a->mult_work); 1613 } 1614 work = a->mult_work; 1615 for ( i=0; i<mbs; i++ ) { 1616 n = ii[1] - ii[0]; ii++; 1617 ncols = n*bs; 1618 PetscMemzero(work,ncols*sizeof(Scalar)); 1619 LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1620 v += n*bs2; 1621 x += bs; 1622 workt = work; 1623 for ( j=0; j<n; j++ ) { 1624 zb = z + bs*(*idx++); 1625 for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1626 workt += bs; 1627 } 1628 } 1629 } 1630 } 1631 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 1632 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1633 PLogFlops(2*a->nz*a->bs2); 1634 return 0; 1635 } 1636 1637 #undef __FUNC__ 1638 #define __FUNC__ "MatGetInfo_SeqBAIJ" /* ADIC Ignore */ 1639 int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info) 1640 { 1641 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1642 1643 info->rows_global = (double)a->m; 1644 info->columns_global = (double)a->n; 1645 info->rows_local = (double)a->m; 1646 info->columns_local = (double)a->n; 1647 info->block_size = a->bs2; 1648 info->nz_allocated = a->maxnz; 1649 info->nz_used = a->bs2*a->nz; 1650 info->nz_unneeded = (double)(info->nz_allocated - info->nz_used); 1651 /* if (info->nz_unneeded != A->info.nz_unneeded) 1652 printf("space descrepancy: maxnz-nz = %d, nz_unneeded = %d\n",(int)info->nz_unneeded,(int)A->info.nz_unneeded); */ 1653 info->assemblies = A->num_ass; 1654 info->mallocs = a->reallocs; 1655 info->memory = A->mem; 1656 if (A->factor) { 1657 info->fill_ratio_given = A->info.fill_ratio_given; 1658 info->fill_ratio_needed = A->info.fill_ratio_needed; 1659 info->factor_mallocs = A->info.factor_mallocs; 1660 } else { 1661 info->fill_ratio_given = 0; 1662 info->fill_ratio_needed = 0; 1663 info->factor_mallocs = 0; 1664 } 1665 return 0; 1666 } 1667 1668 #undef __FUNC__ 1669 #define __FUNC__ "MatEqual_SeqBAIJ" 1670 int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg) 1671 { 1672 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 1673 1674 if (B->type !=MATSEQBAIJ)SETERRQ(1,0,"Matrices must be same type"); 1675 1676 /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 1677 if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)|| 1678 (a->nz != b->nz)) { 1679 *flg = PETSC_FALSE; return 0; 1680 } 1681 1682 /* if the a->i are the same */ 1683 if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) { 1684 *flg = PETSC_FALSE; return 0; 1685 } 1686 1687 /* if a->j are the same */ 1688 if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) { 1689 *flg = PETSC_FALSE; return 0; 1690 } 1691 1692 /* if a->a are the same */ 1693 if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) { 1694 *flg = PETSC_FALSE; return 0; 1695 } 1696 *flg = PETSC_TRUE; 1697 return 0; 1698 1699 } 1700 1701 #undef __FUNC__ 1702 #define __FUNC__ "MatGetDiagonal_SeqBAIJ" 1703 int MatGetDiagonal_SeqBAIJ(Mat A,Vec v) 1704 { 1705 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1706 int i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 1707 Scalar *x, zero = 0.0,*aa,*aa_j; 1708 1709 if (A->factor) SETERRQ(1,0,"Not for factored matrix"); 1710 bs = a->bs; 1711 aa = a->a; 1712 ai = a->i; 1713 aj = a->j; 1714 ambs = a->mbs; 1715 bs2 = a->bs2; 1716 1717 VecSet(&zero,v); 1718 VecGetArray_Fast(v,x); VecGetLocalSize_Fast(v,n); 1719 if (n != a->m) SETERRQ(1,0,"Nonconforming matrix and vector"); 1720 for ( i=0; i<ambs; i++ ) { 1721 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1722 if (aj[j] == i) { 1723 row = i*bs; 1724 aa_j = aa+j*bs2; 1725 for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 1726 break; 1727 } 1728 } 1729 } 1730 return 0; 1731 } 1732 1733 #undef __FUNC__ 1734 #define __FUNC__ "MatDiagonalScale_SeqBAIJ" 1735 int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr) 1736 { 1737 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1738 Scalar *l,*r,x,*v,*aa,*li,*ri; 1739 int i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2; 1740 1741 ai = a->i; 1742 aj = a->j; 1743 aa = a->a; 1744 m = a->m; 1745 n = a->n; 1746 bs = a->bs; 1747 mbs = a->mbs; 1748 bs2 = a->bs2; 1749 if (ll) { 1750 VecGetArray_Fast(ll,l); VecGetLocalSize_Fast(ll,lm); 1751 if (lm != m) SETERRQ(1,0,"Left scaling vector wrong length"); 1752 for ( i=0; i<mbs; i++ ) { /* for each block row */ 1753 M = ai[i+1] - ai[i]; 1754 li = l + i*bs; 1755 v = aa + bs2*ai[i]; 1756 for ( j=0; j<M; j++ ) { /* for each block */ 1757 for ( k=0; k<bs2; k++ ) { 1758 (*v++) *= li[k%bs]; 1759 } 1760 } 1761 } 1762 } 1763 1764 if (rr) { 1765 VecGetArray_Fast(rr,r); VecGetLocalSize_Fast(rr,rn); 1766 if (rn != n) SETERRQ(1,0,"Right scaling vector wrong length"); 1767 for ( i=0; i<mbs; i++ ) { /* for each block row */ 1768 M = ai[i+1] - ai[i]; 1769 v = aa + bs2*ai[i]; 1770 for ( j=0; j<M; j++ ) { /* for each block */ 1771 ri = r + bs*aj[ai[i]+j]; 1772 for ( k=0; k<bs; k++ ) { 1773 x = ri[k]; 1774 for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x; 1775 } 1776 } 1777 } 1778 } 1779 return 0; 1780 } 1781 1782 1783 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 1784 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 1785 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int); 1786 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*); 1787 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**); 1788 1789 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec); 1790 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec); 1791 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec); 1792 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec); 1793 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec); 1794 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec); 1795 extern int MatSolve_SeqBAIJ_7(Mat,Vec,Vec); 1796 1797 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 1798 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 1799 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 1800 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 1801 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 1802 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 1803 1804 #undef __FUNC__ 1805 #define __FUNC__ "MatNorm_SeqBAIJ" 1806 int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm) 1807 { 1808 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1809 Scalar *v = a->a; 1810 double sum = 0.0; 1811 int i,nz=a->nz,bs2=a->bs2; 1812 1813 if (type == NORM_FROBENIUS) { 1814 for (i=0; i< bs2*nz; i++ ) { 1815 #if defined(PETSC_COMPLEX) 1816 sum += real(conj(*v)*(*v)); v++; 1817 #else 1818 sum += (*v)*(*v); v++; 1819 #endif 1820 } 1821 *norm = sqrt(sum); 1822 } 1823 else { 1824 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 1825 } 1826 return 0; 1827 } 1828 1829 /* 1830 note: This can only work for identity for row and col. It would 1831 be good to check this and otherwise generate an error. 1832 */ 1833 #undef __FUNC__ 1834 #define __FUNC__ "MatILUFactor_SeqBAIJ" 1835 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill) 1836 { 1837 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1838 Mat outA; 1839 int ierr; 1840 1841 if (fill != 0) SETERRQ(1,0,"Only fill=0 supported"); 1842 1843 outA = inA; 1844 inA->factor = FACTOR_LU; 1845 a->row = row; 1846 a->col = col; 1847 1848 if (!a->solve_work) { 1849 a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 1850 PLogObjectMemory(inA,(a->m+a->bs)*sizeof(Scalar)); 1851 } 1852 1853 if (!a->diag) { 1854 ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr); 1855 } 1856 ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr); 1857 return 0; 1858 } 1859 1860 #undef __FUNC__ 1861 #define __FUNC__ "MatScale_SeqBAIJ" 1862 int MatScale_SeqBAIJ(Scalar *alpha,Mat inA) 1863 { 1864 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1865 int one = 1, totalnz = a->bs2*a->nz; 1866 BLscal_( &totalnz, alpha, a->a, &one ); 1867 PLogFlops(totalnz); 1868 return 0; 1869 } 1870 1871 #undef __FUNC__ 1872 #define __FUNC__ "MatGetValues_SeqBAIJ" 1873 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 1874 { 1875 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1876 int *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 1877 int *ai = a->i, *ailen = a->ilen; 1878 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 1879 Scalar *ap, *aa = a->a, zero = 0.0; 1880 1881 for ( k=0; k<m; k++ ) { /* loop over rows */ 1882 row = im[k]; brow = row/bs; 1883 if (row < 0) SETERRQ(1,0,"Negative row"); 1884 if (row >= a->m) SETERRQ(1,0,"Row too large"); 1885 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1886 nrow = ailen[brow]; 1887 for ( l=0; l<n; l++ ) { /* loop over columns */ 1888 if (in[l] < 0) SETERRQ(1,0,"Negative column"); 1889 if (in[l] >= a->n) SETERRQ(1,0,"Column too large"); 1890 col = in[l] ; 1891 bcol = col/bs; 1892 cidx = col%bs; 1893 ridx = row%bs; 1894 high = nrow; 1895 low = 0; /* assume unsorted */ 1896 while (high-low > 5) { 1897 t = (low+high)/2; 1898 if (rp[t] > bcol) high = t; 1899 else low = t; 1900 } 1901 for ( i=low; i<high; i++ ) { 1902 if (rp[i] > bcol) break; 1903 if (rp[i] == bcol) { 1904 *v++ = ap[bs2*i+bs*cidx+ridx]; 1905 goto finished; 1906 } 1907 } 1908 *v++ = zero; 1909 finished:; 1910 } 1911 } 1912 return 0; 1913 } 1914 1915 #undef __FUNC__ 1916 #define __FUNC__ "MatGetBlockSize_SeqBAIJ" /* ADIC Ignore */ 1917 int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs) 1918 { 1919 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data; 1920 *bs = baij->bs; 1921 return 0; 1922 } 1923 1924 /* idx should be of length atlease bs */ 1925 #undef __FUNC__ 1926 #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Block" 1927 static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg) 1928 { 1929 int i,row; 1930 row = idx[0]; 1931 if (row%bs!=0) { *flg = PETSC_FALSE; return 0; } 1932 1933 for ( i=1; i<bs; i++ ) { 1934 if (row+i != idx[i]) { *flg = PETSC_FALSE; return 0; } 1935 } 1936 *flg = PETSC_TRUE; 1937 return 0; 1938 } 1939 1940 #undef __FUNC__ 1941 #define __FUNC__ "MatZeroRows_SeqBAIJ" 1942 int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag) 1943 { 1944 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1945 IS is_local; 1946 int ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2; 1947 PetscTruth flg; 1948 Scalar zero = 0.0,*aa; 1949 1950 /* Make a copy of the IS and sort it */ 1951 ierr = ISGetSize(is,&is_n);CHKERRQ(ierr); 1952 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1953 ierr = ISCreateGeneral(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr); 1954 ierr = ISSort(is_local); CHKERRQ(ierr); 1955 ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr); 1956 1957 i = 0; 1958 while (i < is_n) { 1959 if (rows[i]<0 || rows[i]>m) SETERRQ(1,0,"row out of range"); 1960 flg = PETSC_FALSE; 1961 if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); } 1962 if (flg) { /* There exists a block of rows to be Zerowed */ 1963 baij->ilen[rows[i]/bs] = 0; 1964 i += bs; 1965 } else { /* Zero out only the requested row */ 1966 count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs; 1967 aa = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs); 1968 for ( j=0; j<count; j++ ) { 1969 aa[0] = zero; 1970 aa+=bs; 1971 } 1972 i++; 1973 } 1974 } 1975 if (diag) { 1976 for ( j=0; j<is_n; j++ ) { 1977 ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1978 } 1979 } 1980 ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr); 1981 ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr); 1982 ierr = ISDestroy(is_local); CHKERRQ(ierr); 1983 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 1984 1985 return 0; 1986 } 1987 1988 #undef __FUNC__ 1989 #define __FUNC__ "MatPrintHelp_SeqBAIJ" /* ADIC Ignore */ 1990 int MatPrintHelp_SeqBAIJ(Mat A) 1991 { 1992 static int called = 0; 1993 MPI_Comm comm = A->comm; 1994 1995 if (called) return 0; else called = 1; 1996 PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n"); 1997 PetscPrintf(comm," -mat_block_size <block_size>\n"); 1998 return 0; 1999 } 2000 2001 /* -------------------------------------------------------------------*/ 2002 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ, 2003 MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ, 2004 MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N, 2005 MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ, 2006 MatSolve_SeqBAIJ_N,0, 2007 0,0, 2008 MatLUFactor_SeqBAIJ,0, 2009 0, 2010 MatTranspose_SeqBAIJ, 2011 MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ, 2012 MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ, 2013 0,MatAssemblyEnd_SeqBAIJ, 2014 0, 2015 MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ, 2016 MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0, 2017 MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ, 2018 MatILUFactorSymbolic_SeqBAIJ,0, 2019 0,0, 2020 MatConvertSameType_SeqBAIJ,0,0, 2021 MatILUFactor_SeqBAIJ,0,0, 2022 MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ, 2023 MatGetValues_SeqBAIJ,0, 2024 MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ, 2025 0,0,0,MatGetBlockSize_SeqBAIJ, 2026 MatGetRowIJ_SeqBAIJ, 2027 MatRestoreRowIJ_SeqBAIJ, 2028 0,0,0,0,0,0, 2029 MatSetValuesBlocked_SeqBAIJ}; 2030 2031 #undef __FUNC__ 2032 #define __FUNC__ "MatCreateSeqBAIJ" 2033 /*@C 2034 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 2035 compressed row) format. For good matrix assembly performance the 2036 user should preallocate the matrix storage by setting the parameter nz 2037 (or the array nzz). By setting these parameters accurately, performance 2038 during matrix assembly can be increased by more than a factor of 50. 2039 2040 Input Parameters: 2041 . comm - MPI communicator, set to PETSC_COMM_SELF 2042 . bs - size of block 2043 . m - number of rows 2044 . n - number of columns 2045 . nz - number of block nonzeros per block row (same for all rows) 2046 . nzz - array containing the number of block nonzeros in the various block rows 2047 (possibly different for each block row) or PETSC_NULL 2048 2049 Output Parameter: 2050 . A - the matrix 2051 2052 Options Database Keys: 2053 $ -mat_no_unroll - uses code that does not unroll the loops in the 2054 $ block calculations (much solver) 2055 $ -mat_block_size - size of the blocks to use 2056 2057 Notes: 2058 The block AIJ format is fully compatible with standard Fortran 77 2059 storage. That is, the stored row and column indices can begin at 2060 either one (as in Fortran) or zero. See the users' manual for details. 2061 2062 Specify the preallocated storage with either nz or nnz (not both). 2063 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2064 allocation. For additional details, see the users manual chapter on 2065 matrices. 2066 2067 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2068 @*/ 2069 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 2070 { 2071 Mat B; 2072 Mat_SeqBAIJ *b; 2073 int i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs,size; 2074 2075 MPI_Comm_size(comm,&size); 2076 if (size > 1) SETERRQ(1,0,"Comm must be of size 1"); 2077 2078 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr); 2079 2080 if (mbs*bs!=m || nbs*bs!=n) 2081 SETERRQ(1,0,"Number rows, cols must be divisible by blocksize"); 2082 2083 *A = 0; 2084 PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATSEQBAIJ,comm); 2085 PLogObjectCreate(B); 2086 B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b); 2087 PetscMemzero(b,sizeof(Mat_SeqBAIJ)); 2088 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 2089 ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr); 2090 if (!flg) { 2091 switch (bs) { 2092 case 1: 2093 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2094 B->ops.solve = MatSolve_SeqBAIJ_1; 2095 B->ops.mult = MatMult_SeqBAIJ_1; 2096 B->ops.multadd = MatMultAdd_SeqBAIJ_1; 2097 break; 2098 case 2: 2099 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2100 B->ops.solve = MatSolve_SeqBAIJ_2; 2101 B->ops.mult = MatMult_SeqBAIJ_2; 2102 B->ops.multadd = MatMultAdd_SeqBAIJ_2; 2103 break; 2104 case 3: 2105 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2106 B->ops.solve = MatSolve_SeqBAIJ_3; 2107 B->ops.mult = MatMult_SeqBAIJ_3; 2108 B->ops.multadd = MatMultAdd_SeqBAIJ_3; 2109 break; 2110 case 4: 2111 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2112 B->ops.solve = MatSolve_SeqBAIJ_4; 2113 B->ops.mult = MatMult_SeqBAIJ_4; 2114 B->ops.multadd = MatMultAdd_SeqBAIJ_4; 2115 break; 2116 case 5: 2117 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2118 B->ops.solve = MatSolve_SeqBAIJ_5; 2119 B->ops.mult = MatMult_SeqBAIJ_5; 2120 B->ops.multadd = MatMultAdd_SeqBAIJ_5; 2121 break; 2122 case 7: 2123 B->ops.mult = MatMult_SeqBAIJ_7; 2124 B->ops.solve = MatSolve_SeqBAIJ_7; 2125 B->ops.multadd = MatMultAdd_SeqBAIJ_7; 2126 break; 2127 } 2128 } 2129 B->destroy = MatDestroy_SeqBAIJ; 2130 B->view = MatView_SeqBAIJ; 2131 B->factor = 0; 2132 B->lupivotthreshold = 1.0; 2133 B->mapping = 0; 2134 b->row = 0; 2135 b->col = 0; 2136 b->reallocs = 0; 2137 2138 b->m = m; B->m = m; B->M = m; 2139 b->n = n; B->n = n; B->N = n; 2140 b->mbs = mbs; 2141 b->nbs = nbs; 2142 b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax); 2143 if (nnz == PETSC_NULL) { 2144 if (nz == PETSC_DEFAULT) nz = 5; 2145 else if (nz <= 0) nz = 1; 2146 for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 2147 nz = nz*mbs; 2148 } 2149 else { 2150 nz = 0; 2151 for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 2152 } 2153 2154 /* allocate the matrix space */ 2155 len = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int); 2156 b->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a); 2157 PetscMemzero(b->a,nz*bs2*sizeof(Scalar)); 2158 b->j = (int *) (b->a + nz*bs2); 2159 PetscMemzero(b->j,nz*sizeof(int)); 2160 b->i = b->j + nz; 2161 b->singlemalloc = 1; 2162 2163 b->i[0] = 0; 2164 for (i=1; i<mbs+1; i++) { 2165 b->i[i] = b->i[i-1] + b->imax[i-1]; 2166 } 2167 2168 /* b->ilen will count nonzeros in each block row so far. */ 2169 b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 2170 PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 2171 for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 2172 2173 b->bs = bs; 2174 b->bs2 = bs2; 2175 b->mbs = mbs; 2176 b->nz = 0; 2177 b->maxnz = nz*bs2; 2178 b->sorted = 0; 2179 b->roworiented = 1; 2180 b->nonew = 0; 2181 b->diag = 0; 2182 b->solve_work = 0; 2183 b->mult_work = 0; 2184 b->spptr = 0; 2185 B->info.nz_unneeded = (double)b->maxnz; 2186 2187 *A = B; 2188 ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr); 2189 if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); } 2190 return 0; 2191 } 2192 2193 #undef __FUNC__ 2194 #define __FUNC__ "MatConvertSameType_SeqBAIJ" 2195 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues) 2196 { 2197 Mat C; 2198 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 2199 int i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 2200 2201 if (a->i[mbs] != nz) SETERRQ(1,0,"Corrupt matrix"); 2202 2203 *B = 0; 2204 PetscHeaderCreate(C,_p_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm); 2205 PLogObjectCreate(C); 2206 C->data = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c); 2207 PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps)); 2208 C->destroy = MatDestroy_SeqBAIJ; 2209 C->view = MatView_SeqBAIJ; 2210 C->factor = A->factor; 2211 c->row = 0; 2212 c->col = 0; 2213 C->assembled = PETSC_TRUE; 2214 2215 c->m = C->m = a->m; 2216 c->n = C->n = a->n; 2217 C->M = a->m; 2218 C->N = a->n; 2219 2220 c->bs = a->bs; 2221 c->bs2 = a->bs2; 2222 c->mbs = a->mbs; 2223 c->nbs = a->nbs; 2224 2225 c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax); 2226 c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen); 2227 for ( i=0; i<mbs; i++ ) { 2228 c->imax[i] = a->imax[i]; 2229 c->ilen[i] = a->ilen[i]; 2230 } 2231 2232 /* allocate the matrix space */ 2233 c->singlemalloc = 1; 2234 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int)); 2235 c->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a); 2236 c->j = (int *) (c->a + nz*bs2); 2237 c->i = c->j + nz; 2238 PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int)); 2239 if (mbs > 0) { 2240 PetscMemcpy(c->j,a->j,nz*sizeof(int)); 2241 if (cpvalues == COPY_VALUES) { 2242 PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar)); 2243 } 2244 } 2245 2246 PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 2247 c->sorted = a->sorted; 2248 c->roworiented = a->roworiented; 2249 c->nonew = a->nonew; 2250 2251 if (a->diag) { 2252 c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag); 2253 PLogObjectMemory(C,(mbs+1)*sizeof(int)); 2254 for ( i=0; i<mbs; i++ ) { 2255 c->diag[i] = a->diag[i]; 2256 } 2257 } 2258 else c->diag = 0; 2259 c->nz = a->nz; 2260 c->maxnz = a->maxnz; 2261 c->solve_work = 0; 2262 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 2263 c->mult_work = 0; 2264 *B = C; 2265 return 0; 2266 } 2267 2268 #undef __FUNC__ 2269 #define __FUNC__ "MatLoad_SeqBAIJ" 2270 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 2271 { 2272 Mat_SeqBAIJ *a; 2273 Mat B; 2274 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg; 2275 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2276 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 2277 int *masked, nmask,tmp,bs2,ishift; 2278 Scalar *aa; 2279 MPI_Comm comm = ((PetscObject) viewer)->comm; 2280 2281 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 2282 bs2 = bs*bs; 2283 2284 MPI_Comm_size(comm,&size); 2285 if (size > 1) SETERRQ(1,0,"view must have one processor"); 2286 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 2287 ierr = PetscBinaryRead(fd,header,4,BINARY_INT); CHKERRQ(ierr); 2288 if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not Mat object"); 2289 M = header[1]; N = header[2]; nz = header[3]; 2290 2291 if (M != N) SETERRQ(1,0,"Can only do square matrices"); 2292 2293 /* 2294 This code adds extra rows to make sure the number of rows is 2295 divisible by the blocksize 2296 */ 2297 mbs = M/bs; 2298 extra_rows = bs - M + bs*(mbs); 2299 if (extra_rows == bs) extra_rows = 0; 2300 else mbs++; 2301 if (extra_rows) { 2302 PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 2303 } 2304 2305 /* read in row lengths */ 2306 rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 2307 ierr = PetscBinaryRead(fd,rowlengths,M,BINARY_INT); CHKERRQ(ierr); 2308 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 2309 2310 /* read in column indices */ 2311 jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj); 2312 ierr = PetscBinaryRead(fd,jj,nz,BINARY_INT); CHKERRQ(ierr); 2313 for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 2314 2315 /* loop over row lengths determining block row lengths */ 2316 browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 2317 PetscMemzero(browlengths,mbs*sizeof(int)); 2318 mask = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask); 2319 PetscMemzero(mask,mbs*sizeof(int)); 2320 masked = mask + mbs; 2321 rowcount = 0; nzcount = 0; 2322 for ( i=0; i<mbs; i++ ) { 2323 nmask = 0; 2324 for ( j=0; j<bs; j++ ) { 2325 kmax = rowlengths[rowcount]; 2326 for ( k=0; k<kmax; k++ ) { 2327 tmp = jj[nzcount++]/bs; 2328 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2329 } 2330 rowcount++; 2331 } 2332 browlengths[i] += nmask; 2333 /* zero out the mask elements we set */ 2334 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 2335 } 2336 2337 /* create our matrix */ 2338 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A); 2339 CHKERRQ(ierr); 2340 B = *A; 2341 a = (Mat_SeqBAIJ *) B->data; 2342 2343 /* set matrix "i" values */ 2344 a->i[0] = 0; 2345 for ( i=1; i<= mbs; i++ ) { 2346 a->i[i] = a->i[i-1] + browlengths[i-1]; 2347 a->ilen[i-1] = browlengths[i-1]; 2348 } 2349 a->nz = 0; 2350 for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 2351 2352 /* read in nonzero values */ 2353 aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 2354 ierr = PetscBinaryRead(fd,aa,nz,BINARY_SCALAR); CHKERRQ(ierr); 2355 for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 2356 2357 /* set "a" and "j" values into matrix */ 2358 nzcount = 0; jcount = 0; 2359 for ( i=0; i<mbs; i++ ) { 2360 nzcountb = nzcount; 2361 nmask = 0; 2362 for ( j=0; j<bs; j++ ) { 2363 kmax = rowlengths[i*bs+j]; 2364 for ( k=0; k<kmax; k++ ) { 2365 tmp = jj[nzcount++]/bs; 2366 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2367 } 2368 rowcount++; 2369 } 2370 /* sort the masked values */ 2371 PetscSortInt(nmask,masked); 2372 2373 /* set "j" values into matrix */ 2374 maskcount = 1; 2375 for ( j=0; j<nmask; j++ ) { 2376 a->j[jcount++] = masked[j]; 2377 mask[masked[j]] = maskcount++; 2378 } 2379 /* set "a" values into matrix */ 2380 ishift = bs2*a->i[i]; 2381 for ( j=0; j<bs; j++ ) { 2382 kmax = rowlengths[i*bs+j]; 2383 for ( k=0; k<kmax; k++ ) { 2384 tmp = jj[nzcountb]/bs ; 2385 block = mask[tmp] - 1; 2386 point = jj[nzcountb] - bs*tmp; 2387 idx = ishift + bs2*block + j + bs*point; 2388 a->a[idx] = aa[nzcountb++]; 2389 } 2390 } 2391 /* zero out the mask elements we set */ 2392 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 2393 } 2394 if (jcount != a->nz) SETERRQ(1,0,"Error bad binary matrix"); 2395 2396 PetscFree(rowlengths); 2397 PetscFree(browlengths); 2398 PetscFree(aa); 2399 PetscFree(jj); 2400 PetscFree(mask); 2401 2402 B->assembled = PETSC_TRUE; 2403 2404 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr); 2405 if (flg) { 2406 Viewer tviewer; 2407 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2408 ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO,0);CHKERRQ(ierr); 2409 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2410 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2411 } 2412 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr); 2413 if (flg) { 2414 Viewer tviewer; 2415 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2416 ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO_LONG,0);CHKERRQ(ierr); 2417 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2418 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2419 } 2420 ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr); 2421 if (flg) { 2422 Viewer tviewer; 2423 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2424 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2425 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2426 } 2427 ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr); 2428 if (flg) { 2429 Viewer tviewer; 2430 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2431 ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_MATLAB,"M");CHKERRQ(ierr); 2432 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2433 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2434 } 2435 ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr); 2436 if (flg) { 2437 Viewer tviewer; 2438 ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr); 2439 ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr); 2440 ierr = ViewerFlush(tviewer); CHKERRQ(ierr); 2441 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2442 } 2443 return 0; 2444 } 2445 2446 2447 2448