1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: baij.c,v 1.111 1997/08/29 17:15:56 curfman Exp bsmith $"; 3 #endif 4 5 /* 6 Defines the basic matrix operations for the BAIJ (compressed row) 7 matrix storage format. 8 */ 9 10 #include "pinclude/pviewer.h" 11 #include "sys.h" 12 #include "src/mat/impls/baij/seq/baij.h" 13 #include "src/vec/vecimpl.h" 14 #include "src/inline/spops.h" 15 #include "petsc.h" 16 17 18 /* 19 Adds diagonal pointers to sparse matrix structure. 20 */ 21 22 #undef __FUNC__ 23 #define __FUNC__ "MatMarkDiag_SeqBAIJ" 24 int MatMarkDiag_SeqBAIJ(Mat A) 25 { 26 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 27 int i,j, *diag, m = a->mbs; 28 29 diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag); 30 PLogObjectMemory(A,(m+1)*sizeof(int)); 31 for ( i=0; i<m; i++ ) { 32 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 33 if (a->j[j] == i) { 34 diag[i] = j; 35 break; 36 } 37 } 38 } 39 a->diag = diag; 40 return 0; 41 } 42 43 44 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 45 46 #undef __FUNC__ 47 #define __FUNC__ "MatGetRowIJ_SeqBAIJ" 48 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 49 PetscTruth *done) 50 { 51 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 52 int ierr, n = a->mbs,i; 53 54 *nn = n; 55 if (!ia) return 0; 56 if (symmetric) { 57 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 58 } else if (oshift == 1) { 59 /* temporarily add 1 to i and j indices */ 60 int nz = a->i[n] + 1; 61 for ( i=0; i<nz; i++ ) a->j[i]++; 62 for ( i=0; i<n+1; i++ ) a->i[i]++; 63 *ia = a->i; *ja = a->j; 64 } else { 65 *ia = a->i; *ja = a->j; 66 } 67 68 return 0; 69 } 70 71 #undef __FUNC__ 72 #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ" 73 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 74 PetscTruth *done) 75 { 76 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 77 int i,n = a->mbs; 78 79 if (!ia) return 0; 80 if (symmetric) { 81 PetscFree(*ia); 82 PetscFree(*ja); 83 } else if (oshift == 1) { 84 int nz = a->i[n]; 85 for ( i=0; i<nz; i++ ) a->j[i]--; 86 for ( i=0; i<n+1; i++ ) a->i[i]--; 87 } 88 return 0; 89 } 90 91 92 #undef __FUNC__ 93 #define __FUNC__ "MatView_SeqBAIJ_Binary" 94 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 95 { 96 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 97 int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 98 Scalar *aa; 99 100 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 101 col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 102 col_lens[0] = MAT_COOKIE; 103 104 col_lens[1] = a->m; 105 col_lens[2] = a->n; 106 col_lens[3] = a->nz*bs2; 107 108 /* store lengths of each row and write (including header) to file */ 109 count = 0; 110 for ( i=0; i<a->mbs; i++ ) { 111 for ( j=0; j<bs; j++ ) { 112 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 113 } 114 } 115 ierr = PetscBinaryWrite(fd,col_lens,4+a->m,PETSC_INT,1); CHKERRQ(ierr); 116 PetscFree(col_lens); 117 118 /* store column indices (zero start index) */ 119 jj = (int *) PetscMalloc( (a->nz+1)*bs2*sizeof(int) ); CHKPTRQ(jj); 120 count = 0; 121 for ( i=0; i<a->mbs; i++ ) { 122 for ( j=0; j<bs; j++ ) { 123 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 124 for ( l=0; l<bs; l++ ) { 125 jj[count++] = bs*a->j[k] + l; 126 } 127 } 128 } 129 } 130 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0); CHKERRQ(ierr); 131 PetscFree(jj); 132 133 /* store nonzero values */ 134 aa = (Scalar *) PetscMalloc((a->nz+1)*bs2*sizeof(Scalar)); CHKPTRQ(aa); 135 count = 0; 136 for ( i=0; i<a->mbs; i++ ) { 137 for ( j=0; j<bs; j++ ) { 138 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 139 for ( l=0; l<bs; l++ ) { 140 aa[count++] = a->a[bs2*k + l*bs + j]; 141 } 142 } 143 } 144 } 145 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0); CHKERRQ(ierr); 146 PetscFree(aa); 147 return 0; 148 } 149 150 #undef __FUNC__ 151 #define __FUNC__ "MatView_SeqBAIJ_ASCII" 152 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 153 { 154 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 155 int ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2; 156 FILE *fd; 157 char *outputname; 158 159 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 160 ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr); 161 ierr = ViewerGetFormat(viewer,&format); 162 if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) { 163 fprintf(fd," block size is %d\n",bs); 164 } 165 else if (format == VIEWER_FORMAT_ASCII_MATLAB) { 166 SETERRQ(1,0,"Matlab format not supported"); 167 } 168 else if (format == VIEWER_FORMAT_ASCII_COMMON) { 169 for ( i=0; i<a->mbs; i++ ) { 170 for ( j=0; j<bs; j++ ) { 171 fprintf(fd,"row %d:",i*bs+j); 172 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 173 for ( l=0; l<bs; l++ ) { 174 #if defined(PETSC_COMPLEX) 175 if (imag(a->a[bs2*k + l*bs + j]) > 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0) 176 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 177 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 178 else if (imag(a->a[bs2*k + l*bs + j]) < 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0) 179 fprintf(fd," %d %g - %g i",bs*a->j[k]+l, 180 real(a->a[bs2*k + l*bs + j]),-imag(a->a[bs2*k + l*bs + j])); 181 else if (real(a->a[bs2*k + l*bs + j]) != 0.0) 182 fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 183 #else 184 if (a->a[bs2*k + l*bs + j] != 0.0) 185 fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 186 #endif 187 } 188 } 189 fprintf(fd,"\n"); 190 } 191 } 192 } 193 else { 194 for ( i=0; i<a->mbs; i++ ) { 195 for ( j=0; j<bs; j++ ) { 196 fprintf(fd,"row %d:",i*bs+j); 197 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 198 for ( l=0; l<bs; l++ ) { 199 #if defined(PETSC_COMPLEX) 200 if (imag(a->a[bs2*k + l*bs + j]) > 0.0) { 201 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 202 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 203 } 204 else if (imag(a->a[bs2*k + l*bs + j]) < 0.0) { 205 fprintf(fd," %d %g - %g i",bs*a->j[k]+l, 206 real(a->a[bs2*k + l*bs + j]),-imag(a->a[bs2*k + l*bs + j])); 207 } 208 else { 209 fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 210 } 211 #else 212 fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 213 #endif 214 } 215 } 216 fprintf(fd,"\n"); 217 } 218 } 219 } 220 fflush(fd); 221 return 0; 222 } 223 224 #undef __FUNC__ 225 #define __FUNC__ "MatView_SeqBAIJ_Draw" 226 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer) 227 { 228 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *) A->data; 229 int row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2; 230 double xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r; 231 Scalar *aa; 232 Draw draw; 233 DrawButton button; 234 PetscTruth isnull; 235 236 ViewerDrawGetDraw(viewer,&draw); 237 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) return 0; 238 239 xr = a->n; yr = a->m; h = yr/10.0; w = xr/10.0; 240 xr += w; yr += h; xl = -w; yl = -h; 241 ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr); 242 /* loop over matrix elements drawing boxes */ 243 color = DRAW_BLUE; 244 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 245 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 246 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 247 x_l = a->j[j]*bs; x_r = x_l + 1.0; 248 aa = a->a + j*bs2; 249 for ( k=0; k<bs; k++ ) { 250 for ( l=0; l<bs; l++ ) { 251 if (PetscReal(*aa++) >= 0.) continue; 252 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 253 } 254 } 255 } 256 } 257 color = DRAW_CYAN; 258 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 259 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 260 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 261 x_l = a->j[j]*bs; x_r = x_l + 1.0; 262 aa = a->a + j*bs2; 263 for ( k=0; k<bs; k++ ) { 264 for ( l=0; l<bs; l++ ) { 265 if (PetscReal(*aa++) != 0.) continue; 266 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 267 } 268 } 269 } 270 } 271 272 color = DRAW_RED; 273 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 274 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 275 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 276 x_l = a->j[j]*bs; x_r = x_l + 1.0; 277 aa = a->a + j*bs2; 278 for ( k=0; k<bs; k++ ) { 279 for ( l=0; l<bs; l++ ) { 280 if (PetscReal(*aa++) <= 0.) continue; 281 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 282 } 283 } 284 } 285 } 286 287 DrawFlush(draw); 288 DrawGetPause(draw,&pause); 289 if (pause >= 0) { PetscSleep(pause); return 0;} 290 291 /* allow the matrix to zoom or shrink */ 292 ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0); 293 while (button != BUTTON_RIGHT) { 294 DrawClear(draw); 295 if (button == BUTTON_LEFT) scale = .5; 296 else if (button == BUTTON_CENTER) scale = 2.; 297 xl = scale*(xl + w - xc) + xc - w*scale; 298 xr = scale*(xr - w - xc) + xc + w*scale; 299 yl = scale*(yl + h - yc) + yc - h*scale; 300 yr = scale*(yr - h - yc) + yc + h*scale; 301 w *= scale; h *= scale; 302 ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr); 303 color = DRAW_BLUE; 304 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 305 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 306 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 307 x_l = a->j[j]*bs; x_r = x_l + 1.0; 308 aa = a->a + j*bs2; 309 for ( k=0; k<bs; k++ ) { 310 for ( l=0; l<bs; l++ ) { 311 if (PetscReal(*aa++) >= 0.) continue; 312 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 313 } 314 } 315 } 316 } 317 color = DRAW_CYAN; 318 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 319 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 320 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 321 x_l = a->j[j]*bs; x_r = x_l + 1.0; 322 aa = a->a + j*bs2; 323 for ( k=0; k<bs; k++ ) { 324 for ( l=0; l<bs; l++ ) { 325 if (PetscReal(*aa++) != 0.) continue; 326 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 327 } 328 } 329 } 330 } 331 332 color = DRAW_RED; 333 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 334 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 335 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 336 x_l = a->j[j]*bs; x_r = x_l + 1.0; 337 aa = a->a + j*bs2; 338 for ( k=0; k<bs; k++ ) { 339 for ( l=0; l<bs; l++ ) { 340 if (PetscReal(*aa++) <= 0.) continue; 341 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 342 } 343 } 344 } 345 } 346 ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0); 347 } 348 return 0; 349 } 350 351 #undef __FUNC__ 352 #define __FUNC__ "MatView_SeqBAIJ" 353 int MatView_SeqBAIJ(PetscObject obj,Viewer viewer) 354 { 355 Mat A = (Mat) obj; 356 ViewerType vtype; 357 int ierr; 358 359 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 360 if (vtype == MATLAB_VIEWER) { 361 SETERRQ(1,0,"Matlab viewer not supported"); 362 } 363 else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){ 364 return MatView_SeqBAIJ_ASCII(A,viewer); 365 } 366 else if (vtype == BINARY_FILE_VIEWER) { 367 return MatView_SeqBAIJ_Binary(A,viewer); 368 } 369 else if (vtype == DRAW_VIEWER) { 370 return MatView_SeqBAIJ_Draw(A,viewer); 371 } 372 return 0; 373 } 374 375 #define CHUNKSIZE 10 376 377 #undef __FUNC__ 378 #define __FUNC__ "MatSetValues_SeqBAIJ" 379 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 380 { 381 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 382 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 383 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 384 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 385 int ridx,cidx,bs2=a->bs2; 386 Scalar *ap,value,*aa=a->a,*bap; 387 388 for ( k=0; k<m; k++ ) { /* loop over added rows */ 389 row = im[k]; brow = row/bs; 390 #if defined(PETSC_BOPT_g) 391 if (row < 0) SETERRQ(1,0,"Negative row"); 392 if (row >= a->m) SETERRQ(1,0,"Row too large"); 393 #endif 394 rp = aj + ai[brow]; 395 ap = aa + bs2*ai[brow]; 396 rmax = imax[brow]; 397 nrow = ailen[brow]; 398 low = 0; 399 for ( l=0; l<n; l++ ) { /* loop over added columns */ 400 #if defined(PETSC_BOPT_g) 401 if (in[l] < 0) SETERRQ(1,0,"Negative column"); 402 if (in[l] >= a->n) SETERRQ(1,0,"Column too large"); 403 #endif 404 col = in[l]; bcol = col/bs; 405 ridx = row % bs; cidx = col % bs; 406 if (roworiented) { 407 value = *v++; 408 } 409 else { 410 value = v[k + l*m]; 411 } 412 if (!sorted) low = 0; high = nrow; 413 while (high-low > 7) { 414 t = (low+high)/2; 415 if (rp[t] > bcol) high = t; 416 else low = t; 417 } 418 for ( i=low; i<high; i++ ) { 419 if (rp[i] > bcol) break; 420 if (rp[i] == bcol) { 421 bap = ap + bs2*i + bs*cidx + ridx; 422 if (is == ADD_VALUES) *bap += value; 423 else *bap = value; 424 goto noinsert1; 425 } 426 } 427 if (nonew == 1) goto noinsert1; 428 else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); 429 if (nrow >= rmax) { 430 /* there is no extra room in row, therefore enlarge */ 431 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 432 Scalar *new_a; 433 434 if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); 435 436 /* Malloc new storage space */ 437 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 438 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 439 new_j = (int *) (new_a + bs2*new_nz); 440 new_i = new_j + new_nz; 441 442 /* copy over old data into new slots */ 443 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} 444 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 445 PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); 446 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 447 PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, 448 len*sizeof(int)); 449 PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); 450 PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 451 PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), 452 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); 453 /* free up old matrix storage */ 454 PetscFree(a->a); 455 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 456 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 457 a->singlemalloc = 1; 458 459 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 460 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 461 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 462 a->maxnz += bs2*CHUNKSIZE; 463 a->reallocs++; 464 a->nz++; 465 } 466 N = nrow++ - 1; 467 /* shift up all the later entries in this row */ 468 for ( ii=N; ii>=i; ii-- ) { 469 rp[ii+1] = rp[ii]; 470 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 471 } 472 if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 473 rp[i] = bcol; 474 ap[bs2*i + bs*cidx + ridx] = value; 475 noinsert1:; 476 low = i; 477 } 478 ailen[brow] = nrow; 479 } 480 return 0; 481 } 482 483 #undef __FUNC__ 484 #define __FUNC__ "MatSetValuesBlocked_SeqBAIJ" 485 int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 486 { 487 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 488 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 489 int *imax=a->imax,*ai=a->i,*ailen=a->ilen, roworiented=a->roworiented; 490 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval; 491 Scalar *ap,*value=v,*aa=a->a,*bap; 492 493 if (roworiented) { 494 stepval = (n-1)*bs; 495 } else { 496 stepval = (m-1)*bs; 497 } 498 for ( k=0; k<m; k++ ) { /* loop over added rows */ 499 row = im[k]; 500 #if defined(PETSC_BOPT_g) 501 if (row < 0) SETERRQ(1,0,"Negative row"); 502 if (row >= a->mbs) SETERRQ(1,0,"Row too large"); 503 #endif 504 rp = aj + ai[row]; 505 ap = aa + bs2*ai[row]; 506 rmax = imax[row]; 507 nrow = ailen[row]; 508 low = 0; 509 for ( l=0; l<n; l++ ) { /* loop over added columns */ 510 #if defined(PETSC_BOPT_g) 511 if (in[l] < 0) SETERRQ(1,0,"Negative column"); 512 if (in[l] >= a->nbs) SETERRQ(1,0,"Column too large"); 513 #endif 514 col = in[l]; 515 if (roworiented) { 516 value = v + k*(stepval+bs)*bs + l*bs; 517 } else { 518 value = v + l*(stepval+bs)*bs + k*bs; 519 } 520 if (!sorted) low = 0; high = nrow; 521 while (high-low > 7) { 522 t = (low+high)/2; 523 if (rp[t] > col) high = t; 524 else low = t; 525 } 526 for ( i=low; i<high; i++ ) { 527 if (rp[i] > col) break; 528 if (rp[i] == col) { 529 bap = ap + bs2*i; 530 if (roworiented) { 531 if (is == ADD_VALUES) { 532 for ( ii=0; ii<bs; ii++,value+=stepval ) { 533 for (jj=ii; jj<bs2; jj+=bs ) { 534 bap[jj] += *value++; 535 } 536 } 537 } else { 538 for ( ii=0; ii<bs; ii++,value+=stepval ) { 539 for (jj=ii; jj<bs2; jj+=bs ) { 540 bap[jj] = *value++; 541 } 542 } 543 } 544 } else { 545 if (is == ADD_VALUES) { 546 for ( ii=0; ii<bs; ii++,value+=stepval ) { 547 for (jj=0; jj<bs; jj++ ) { 548 *bap++ += *value++; 549 } 550 } 551 } else { 552 for ( ii=0; ii<bs; ii++,value+=stepval ) { 553 for (jj=0; jj<bs; jj++ ) { 554 *bap++ = *value++; 555 } 556 } 557 } 558 } 559 goto noinsert2; 560 } 561 } 562 if (nonew == 1) goto noinsert2; 563 else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); 564 if (nrow >= rmax) { 565 /* there is no extra room in row, therefore enlarge */ 566 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 567 Scalar *new_a; 568 569 if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); 570 571 /* malloc new storage space */ 572 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 573 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 574 new_j = (int *) (new_a + bs2*new_nz); 575 new_i = new_j + new_nz; 576 577 /* copy over old data into new slots */ 578 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} 579 for ( ii=row+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 580 PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int)); 581 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 582 PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int)); 583 PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(Scalar)); 584 PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 585 PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(Scalar)); 586 /* free up old matrix storage */ 587 PetscFree(a->a); 588 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 589 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 590 a->singlemalloc = 1; 591 592 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 593 rmax = imax[row] = imax[row] + CHUNKSIZE; 594 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 595 a->maxnz += bs2*CHUNKSIZE; 596 a->reallocs++; 597 a->nz++; 598 } 599 N = nrow++ - 1; 600 /* shift up all the later entries in this row */ 601 for ( ii=N; ii>=i; ii-- ) { 602 rp[ii+1] = rp[ii]; 603 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 604 } 605 if (N >= i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 606 rp[i] = col; 607 bap = ap + bs2*i; 608 if (roworiented) { 609 for ( ii=0; ii<bs; ii++,value+=stepval) { 610 for (jj=ii; jj<bs2; jj+=bs ) { 611 bap[jj] = *value++; 612 } 613 } 614 } else { 615 for ( ii=0; ii<bs; ii++,value+=stepval ) { 616 for (jj=0; jj<bs; jj++ ) { 617 *bap++ = *value++; 618 } 619 } 620 } 621 noinsert2:; 622 low = i; 623 } 624 ailen[row] = nrow; 625 } 626 return 0; 627 } 628 629 #undef __FUNC__ 630 #define __FUNC__ "MatGetSize_SeqBAIJ" 631 int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 632 { 633 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 634 if (m) *m = a->m; 635 if (n) *n = a->n; 636 return 0; 637 } 638 639 #undef __FUNC__ 640 #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ" 641 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 642 { 643 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 644 *m = 0; *n = a->m; 645 return 0; 646 } 647 648 #undef __FUNC__ 649 #define __FUNC__ "MatGetRow_SeqBAIJ" 650 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 651 { 652 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 653 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 654 Scalar *aa,*v_i,*aa_i; 655 656 bs = a->bs; 657 ai = a->i; 658 aj = a->j; 659 aa = a->a; 660 bs2 = a->bs2; 661 662 if (row < 0 || row >= a->m) SETERRQ(1,0,"Row out of range"); 663 664 bn = row/bs; /* Block number */ 665 bp = row % bs; /* Block Position */ 666 M = ai[bn+1] - ai[bn]; 667 *nz = bs*M; 668 669 if (v) { 670 *v = 0; 671 if (*nz) { 672 *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v); 673 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 674 v_i = *v + i*bs; 675 aa_i = aa + bs2*(ai[bn] + i); 676 for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];} 677 } 678 } 679 } 680 681 if (idx) { 682 *idx = 0; 683 if (*nz) { 684 *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx); 685 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 686 idx_i = *idx + i*bs; 687 itmp = bs*aj[ai[bn] + i]; 688 for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;} 689 } 690 } 691 } 692 return 0; 693 } 694 695 #undef __FUNC__ 696 #define __FUNC__ "MatRestoreRow_SeqBAIJ" 697 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 698 { 699 if (idx) {if (*idx) PetscFree(*idx);} 700 if (v) {if (*v) PetscFree(*v);} 701 return 0; 702 } 703 704 #undef __FUNC__ 705 #define __FUNC__ "MatTranspose_SeqBAIJ" 706 int MatTranspose_SeqBAIJ(Mat A,Mat *B) 707 { 708 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 709 Mat C; 710 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 711 int *rows,*cols,bs2=a->bs2; 712 Scalar *array=a->a; 713 714 if (B==PETSC_NULL && mbs!=nbs) 715 SETERRQ(1,0,"Square matrix only for in-place"); 716 col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col); 717 PetscMemzero(col,(1+nbs)*sizeof(int)); 718 719 for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1; 720 ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr); 721 PetscFree(col); 722 rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows); 723 cols = rows + bs; 724 for ( i=0; i<mbs; i++ ) { 725 cols[0] = i*bs; 726 for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1; 727 len = ai[i+1] - ai[i]; 728 for ( j=0; j<len; j++ ) { 729 rows[0] = (*aj++)*bs; 730 for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1; 731 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr); 732 array += bs2; 733 } 734 } 735 PetscFree(rows); 736 737 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 738 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 739 740 if (B != PETSC_NULL) { 741 *B = C; 742 } else { 743 /* This isn't really an in-place transpose */ 744 PetscFree(a->a); 745 if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);} 746 if (a->diag) PetscFree(a->diag); 747 if (a->ilen) PetscFree(a->ilen); 748 if (a->imax) PetscFree(a->imax); 749 if (a->solve_work) PetscFree(a->solve_work); 750 PetscFree(a); 751 PetscMemcpy(A,C,sizeof(struct _p_Mat)); 752 PetscHeaderDestroy(C); 753 } 754 return 0; 755 } 756 757 758 #undef __FUNC__ 759 #define __FUNC__ "MatAssemblyEnd_SeqBAIJ" 760 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 761 { 762 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 763 int fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax; 764 int m = a->m,*ip, N, *ailen = a->ilen; 765 int mbs = a->mbs, bs2 = a->bs2,rmax = 0; 766 Scalar *aa = a->a, *ap; 767 768 if (mode == MAT_FLUSH_ASSEMBLY) return 0; 769 770 if (m) rmax = ailen[0]; 771 for ( i=1; i<mbs; i++ ) { 772 /* move each row back by the amount of empty slots (fshift) before it*/ 773 fshift += imax[i-1] - ailen[i-1]; 774 rmax = PetscMax(rmax,ailen[i]); 775 if (fshift) { 776 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 777 N = ailen[i]; 778 for ( j=0; j<N; j++ ) { 779 ip[j-fshift] = ip[j]; 780 PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar)); 781 } 782 } 783 ai[i] = ai[i-1] + ailen[i-1]; 784 } 785 if (mbs) { 786 fshift += imax[mbs-1] - ailen[mbs-1]; 787 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 788 } 789 /* reset ilen and imax for each row */ 790 for ( i=0; i<mbs; i++ ) { 791 ailen[i] = imax[i] = ai[i+1] - ai[i]; 792 } 793 a->nz = ai[mbs]; 794 795 /* diagonals may have moved, so kill the diagonal pointers */ 796 if (fshift && a->diag) { 797 PetscFree(a->diag); 798 PLogObjectMemory(A,-(m+1)*sizeof(int)); 799 a->diag = 0; 800 } 801 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 802 m,a->n,a->bs,fshift*bs2,a->nz*bs2); 803 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n", 804 a->reallocs); 805 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 806 a->reallocs = 0; 807 A->info.nz_unneeded = (double)fshift*bs2; 808 809 return 0; 810 } 811 812 #undef __FUNC__ 813 #define __FUNC__ "MatZeroEntries_SeqBAIJ" 814 int MatZeroEntries_SeqBAIJ(Mat A) 815 { 816 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 817 PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar)); 818 return 0; 819 } 820 821 #undef __FUNC__ 822 #define __FUNC__ "MatDestroy_SeqBAIJ" 823 int MatDestroy_SeqBAIJ(PetscObject obj) 824 { 825 Mat A = (Mat) obj; 826 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 827 828 #if defined(PETSC_LOG) 829 PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 830 #endif 831 PetscFree(a->a); 832 if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);} 833 if (a->diag) PetscFree(a->diag); 834 if (a->ilen) PetscFree(a->ilen); 835 if (a->imax) PetscFree(a->imax); 836 if (a->solve_work) PetscFree(a->solve_work); 837 if (a->mult_work) PetscFree(a->mult_work); 838 PetscFree(a); 839 PLogObjectDestroy(A); 840 PetscHeaderDestroy(A); 841 return 0; 842 } 843 844 #undef __FUNC__ 845 #define __FUNC__ "MatSetOption_SeqBAIJ" 846 int MatSetOption_SeqBAIJ(Mat A,MatOption op) 847 { 848 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 849 if (op == MAT_ROW_ORIENTED) a->roworiented = 1; 850 else if (op == MAT_COLUMN_ORIENTED) a->roworiented = 0; 851 else if (op == MAT_COLUMNS_SORTED) a->sorted = 1; 852 else if (op == MAT_COLUMNS_UNSORTED) a->sorted = 0; 853 else if (op == MAT_NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 854 else if (op == MAT_NEW_NONZERO_LOCATION_ERROR) a->nonew = -1; 855 else if (op == MAT_NEW_NONZERO_ALLOCATION_ERROR) a->nonew = -2; 856 else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 857 else if (op == MAT_ROWS_SORTED || 858 op == MAT_ROWS_UNSORTED || 859 op == MAT_SYMMETRIC || 860 op == MAT_STRUCTURALLY_SYMMETRIC || 861 op == MAT_YES_NEW_DIAGONALS || 862 op == MAT_IGNORE_OFF_PROC_ENTRIES) 863 PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n"); 864 else if (op == MAT_NO_NEW_DIAGONALS) 865 {SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");} 866 else 867 {SETERRQ(PETSC_ERR_SUP,0,"unknown option");} 868 return 0; 869 } 870 871 872 /* -------------------------------------------------------*/ 873 /* Should check that shapes of vectors and matrices match */ 874 /* -------------------------------------------------------*/ 875 #include "pinclude/plapack.h" 876 877 #undef __FUNC__ 878 #define __FUNC__ "MatMult_SeqBAIJ_1" 879 static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz) 880 { 881 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 882 register Scalar *x,*z,*v,sum; 883 int mbs=a->mbs,i,*idx,*ii,n; 884 885 VecGetArray_Fast(xx,x); 886 VecGetArray_Fast(zz,z); 887 888 idx = a->j; 889 v = a->a; 890 ii = a->i; 891 892 for ( i=0; i<mbs; i++ ) { 893 n = ii[1] - ii[0]; ii++; 894 sum = 0.0; 895 while (n--) sum += *v++ * x[*idx++]; 896 z[i] = sum; 897 } 898 VecRestoreArray_Fast(xx,x); 899 VecRestoreArray_Fast(zz,z); 900 PLogFlops(2*a->nz - a->m); 901 return 0; 902 } 903 904 #undef __FUNC__ 905 #define __FUNC__ "MatMult_SeqBAIJ_2" 906 static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz) 907 { 908 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 909 register Scalar *x,*z,*v,*xb,sum1,sum2; 910 register Scalar x1,x2; 911 int mbs=a->mbs,i,*idx,*ii; 912 int j,n; 913 914 VecGetArray_Fast(xx,x); 915 VecGetArray_Fast(zz,z); 916 917 idx = a->j; 918 v = a->a; 919 ii = a->i; 920 921 for ( i=0; i<mbs; i++ ) { 922 n = ii[1] - ii[0]; ii++; 923 sum1 = 0.0; sum2 = 0.0; 924 for ( j=0; j<n; j++ ) { 925 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 926 sum1 += v[0]*x1 + v[2]*x2; 927 sum2 += v[1]*x1 + v[3]*x2; 928 v += 4; 929 } 930 z[0] = sum1; z[1] = sum2; 931 z += 2; 932 } 933 VecRestoreArray_Fast(xx,x); 934 VecRestoreArray_Fast(zz,z); 935 PLogFlops(4*a->nz - a->m); 936 return 0; 937 } 938 939 #undef __FUNC__ 940 #define __FUNC__ "MatMult_SeqBAIJ_3" 941 static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz) 942 { 943 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 944 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 945 int mbs=a->mbs,i,*idx,*ii,j,n; 946 947 VecGetArray_Fast(xx,x); 948 VecGetArray_Fast(zz,z); 949 950 idx = a->j; 951 v = a->a; 952 ii = a->i; 953 954 for ( i=0; i<mbs; i++ ) { 955 n = ii[1] - ii[0]; ii++; 956 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 957 for ( j=0; j<n; j++ ) { 958 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 959 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 960 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 961 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 962 v += 9; 963 } 964 z[0] = sum1; z[1] = sum2; z[2] = sum3; 965 z += 3; 966 } 967 VecRestoreArray_Fast(xx,x); 968 VecRestoreArray_Fast(zz,z); 969 PLogFlops(18*a->nz - a->m); 970 return 0; 971 } 972 973 #undef __FUNC__ 974 #define __FUNC__ "MatMult_SeqBAIJ_4" 975 static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz) 976 { 977 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 978 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4; 979 register Scalar x1,x2,x3,x4; 980 int mbs=a->mbs,i,*idx,*ii; 981 int j,n; 982 983 VecGetArray_Fast(xx,x); 984 VecGetArray_Fast(zz,z); 985 986 idx = a->j; 987 v = a->a; 988 ii = a->i; 989 990 for ( i=0; i<mbs; i++ ) { 991 n = ii[1] - ii[0]; ii++; 992 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 993 for ( j=0; j<n; j++ ) { 994 xb = x + 4*(*idx++); 995 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 996 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 997 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 998 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 999 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 1000 v += 16; 1001 } 1002 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 1003 z += 4; 1004 } 1005 VecRestoreArray_Fast(xx,x); 1006 VecRestoreArray_Fast(zz,z); 1007 PLogFlops(32*a->nz - a->m); 1008 return 0; 1009 } 1010 1011 #undef __FUNC__ 1012 #define __FUNC__ "MatMult_SeqBAIJ_5" 1013 static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz) 1014 { 1015 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1016 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5; 1017 register Scalar x1,x2,x3,x4,x5; 1018 int mbs=a->mbs,i,*idx,*ii,j,n; 1019 1020 VecGetArray_Fast(xx,x); 1021 VecGetArray_Fast(zz,z); 1022 1023 idx = a->j; 1024 v = a->a; 1025 ii = a->i; 1026 1027 for ( i=0; i<mbs; i++ ) { 1028 n = ii[1] - ii[0]; ii++; 1029 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 1030 for ( j=0; j<n; j++ ) { 1031 xb = x + 5*(*idx++); 1032 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 1033 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 1034 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 1035 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 1036 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 1037 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 1038 v += 25; 1039 } 1040 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 1041 z += 5; 1042 } 1043 VecRestoreArray_Fast(xx,x); 1044 VecRestoreArray_Fast(zz,z); 1045 PLogFlops(50*a->nz - a->m); 1046 return 0; 1047 } 1048 1049 #undef __FUNC__ 1050 #define __FUNC__ "MatMult_SeqBAIJ_7" 1051 static int MatMult_SeqBAIJ_7(Mat A,Vec xx,Vec zz) 1052 { 1053 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1054 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7; 1055 register Scalar x1,x2,x3,x4,x5,x6,x7; 1056 int mbs=a->mbs,i,*idx,*ii,j,n; 1057 1058 VecGetArray_Fast(xx,x); 1059 VecGetArray_Fast(zz,z); 1060 1061 idx = a->j; 1062 v = a->a; 1063 ii = a->i; 1064 1065 for ( i=0; i<mbs; i++ ) { 1066 n = ii[1] - ii[0]; ii++; 1067 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0; 1068 for ( j=0; j<n; j++ ) { 1069 xb = x + 7*(*idx++); 1070 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6]; 1071 sum1 += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 1072 sum2 += v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 1073 sum3 += v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 1074 sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 1075 sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 1076 sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 1077 sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 1078 v += 49; 1079 } 1080 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7; 1081 z += 7; 1082 } 1083 1084 VecRestoreArray_Fast(xx,x); 1085 VecRestoreArray_Fast(zz,z); 1086 PLogFlops(98*a->nz - a->m); 1087 return 0; 1088 } 1089 1090 #undef __FUNC__ 1091 #define __FUNC__ "MatMult_SeqBAIJ_N" 1092 static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz) 1093 { 1094 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1095 register Scalar *x,*z,*v,*xb; 1096 int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2; 1097 int _One = 1,ncols,k; 1098 Scalar _DOne = 1.0,*work,*workt,_DZero = 0.0; 1099 1100 VecGetArray_Fast(xx,x); 1101 VecGetArray_Fast(zz,z); 1102 1103 idx = a->j; 1104 v = a->a; 1105 ii = a->i; 1106 1107 1108 if (!a->mult_work) { 1109 k = PetscMax(a->m,a->n); 1110 a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work); 1111 } 1112 work = a->mult_work; 1113 for ( i=0; i<mbs; i++ ) { 1114 n = ii[1] - ii[0]; ii++; 1115 ncols = n*bs; 1116 workt = work; 1117 for ( j=0; j<n; j++ ) { 1118 xb = x + bs*(*idx++); 1119 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 1120 workt += bs; 1121 } 1122 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One); 1123 v += n*bs2; 1124 z += bs; 1125 } 1126 VecRestoreArray_Fast(xx,x); 1127 VecRestoreArray_Fast(zz,z); 1128 PLogFlops(2*a->nz*bs2 - a->m); 1129 return 0; 1130 } 1131 1132 #undef __FUNC__ 1133 #define __FUNC__ "MatMultAdd_SeqBAIJ_1" 1134 static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 1135 { 1136 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1137 register Scalar *x,*y,*z,*v,sum; 1138 int mbs=a->mbs,i,*idx,*ii,n; 1139 1140 VecGetArray_Fast(xx,x); 1141 VecGetArray_Fast(yy,y); 1142 VecGetArray_Fast(zz,z); 1143 1144 idx = a->j; 1145 v = a->a; 1146 ii = a->i; 1147 1148 for ( i=0; i<mbs; i++ ) { 1149 n = ii[1] - ii[0]; ii++; 1150 sum = y[i]; 1151 while (n--) sum += *v++ * x[*idx++]; 1152 z[i] = sum; 1153 } 1154 VecRestoreArray_Fast(xx,x); 1155 VecRestoreArray_Fast(yy,y); 1156 VecRestoreArray_Fast(zz,z); 1157 PLogFlops(2*a->nz); 1158 return 0; 1159 } 1160 1161 #undef __FUNC__ 1162 #define __FUNC__ "MatMultAdd_SeqBAIJ_2" 1163 static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 1164 { 1165 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1166 register Scalar *x,*y,*z,*v,*xb,sum1,sum2; 1167 register Scalar x1,x2; 1168 int mbs=a->mbs,i,*idx,*ii; 1169 int j,n; 1170 1171 VecGetArray_Fast(xx,x); 1172 VecGetArray_Fast(yy,y); 1173 VecGetArray_Fast(zz,z); 1174 1175 idx = a->j; 1176 v = a->a; 1177 ii = a->i; 1178 1179 for ( i=0; i<mbs; i++ ) { 1180 n = ii[1] - ii[0]; ii++; 1181 sum1 = y[0]; sum2 = y[1]; 1182 for ( j=0; j<n; j++ ) { 1183 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 1184 sum1 += v[0]*x1 + v[2]*x2; 1185 sum2 += v[1]*x1 + v[3]*x2; 1186 v += 4; 1187 } 1188 z[0] = sum1; z[1] = sum2; 1189 z += 2; y += 2; 1190 } 1191 VecRestoreArray_Fast(xx,x); 1192 VecRestoreArray_Fast(yy,y); 1193 VecRestoreArray_Fast(zz,z); 1194 PLogFlops(4*a->nz); 1195 return 0; 1196 } 1197 1198 #undef __FUNC__ 1199 #define __FUNC__ "MatMultAdd_SeqBAIJ_3" 1200 static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 1201 { 1202 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1203 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 1204 int mbs=a->mbs,i,*idx,*ii,j,n; 1205 1206 VecGetArray_Fast(xx,x); 1207 VecGetArray_Fast(yy,y); 1208 VecGetArray_Fast(zz,z); 1209 1210 idx = a->j; 1211 v = a->a; 1212 ii = a->i; 1213 1214 for ( i=0; i<mbs; i++ ) { 1215 n = ii[1] - ii[0]; ii++; 1216 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; 1217 for ( j=0; j<n; j++ ) { 1218 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1219 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 1220 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 1221 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 1222 v += 9; 1223 } 1224 z[0] = sum1; z[1] = sum2; z[2] = sum3; 1225 z += 3; y += 3; 1226 } 1227 VecRestoreArray_Fast(xx,x); 1228 VecRestoreArray_Fast(yy,y); 1229 VecRestoreArray_Fast(zz,z); 1230 PLogFlops(18*a->nz); 1231 return 0; 1232 } 1233 1234 #undef __FUNC__ 1235 #define __FUNC__ "MatMultAdd_SeqBAIJ_4" 1236 static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 1237 { 1238 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1239 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4; 1240 register Scalar x1,x2,x3,x4; 1241 int mbs=a->mbs,i,*idx,*ii; 1242 int j,n; 1243 1244 VecGetArray_Fast(xx,x); 1245 VecGetArray_Fast(yy,y); 1246 VecGetArray_Fast(zz,z); 1247 1248 idx = a->j; 1249 v = a->a; 1250 ii = a->i; 1251 1252 for ( i=0; i<mbs; i++ ) { 1253 n = ii[1] - ii[0]; ii++; 1254 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; 1255 for ( j=0; j<n; j++ ) { 1256 xb = x + 4*(*idx++); 1257 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1258 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 1259 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 1260 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 1261 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 1262 v += 16; 1263 } 1264 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 1265 z += 4; y += 4; 1266 } 1267 VecRestoreArray_Fast(xx,x); 1268 VecRestoreArray_Fast(yy,y); 1269 VecRestoreArray_Fast(zz,z); 1270 PLogFlops(32*a->nz); 1271 return 0; 1272 } 1273 1274 #undef __FUNC__ 1275 #define __FUNC__ "MatMultAdd_SeqBAIJ_5" 1276 static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 1277 { 1278 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1279 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5; 1280 register Scalar x1,x2,x3,x4,x5; 1281 int mbs=a->mbs,i,*idx,*ii,j,n; 1282 1283 VecGetArray_Fast(xx,x); 1284 VecGetArray_Fast(yy,y); 1285 VecGetArray_Fast(zz,z); 1286 1287 idx = a->j; 1288 v = a->a; 1289 ii = a->i; 1290 1291 for ( i=0; i<mbs; i++ ) { 1292 n = ii[1] - ii[0]; ii++; 1293 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; 1294 for ( j=0; j<n; j++ ) { 1295 xb = x + 5*(*idx++); 1296 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 1297 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 1298 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 1299 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 1300 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 1301 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 1302 v += 25; 1303 } 1304 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 1305 z += 5; y += 5; 1306 } 1307 VecRestoreArray_Fast(xx,x); 1308 VecRestoreArray_Fast(yy,y); 1309 VecRestoreArray_Fast(zz,z); 1310 PLogFlops(50*a->nz); 1311 return 0; 1312 } 1313 1314 #undef __FUNC__ 1315 #define __FUNC__ "MatMultAdd_SeqBAIJ_7" 1316 static int MatMultAdd_SeqBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1317 { 1318 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1319 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7; 1320 register Scalar x1,x2,x3,x4,x5,x6,x7; 1321 int mbs=a->mbs,i,*idx,*ii,j,n; 1322 1323 VecGetArray_Fast(xx,x); 1324 VecGetArray_Fast(yy,y); 1325 VecGetArray_Fast(zz,z); 1326 1327 idx = a->j; 1328 v = a->a; 1329 ii = a->i; 1330 1331 for ( i=0; i<mbs; i++ ) { 1332 n = ii[1] - ii[0]; ii++; 1333 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; sum7 = y[6]; 1334 for ( j=0; j<n; j++ ) { 1335 xb = x + 7*(*idx++); 1336 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6]; 1337 sum1 += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 1338 sum2 += v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 1339 sum3 += v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 1340 sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 1341 sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 1342 sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 1343 sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 1344 v += 49; 1345 } 1346 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7; 1347 z += 7; y += 7; 1348 } 1349 VecRestoreArray_Fast(xx,x); 1350 VecRestoreArray_Fast(yy,y); 1351 VecRestoreArray_Fast(zz,z); 1352 PLogFlops(98*a->nz); 1353 return 0; 1354 } 1355 1356 1357 #undef __FUNC__ 1358 #define __FUNC__ "MatMultAdd_SeqBAIJ_N" 1359 static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 1360 { 1361 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1362 register Scalar *x,*z,*v,*xb; 1363 int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr; 1364 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1365 1366 if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 1367 1368 VecGetArray_Fast(xx,x); 1369 VecGetArray_Fast(zz,z); 1370 1371 idx = a->j; 1372 v = a->a; 1373 ii = a->i; 1374 1375 1376 if (!a->mult_work) { 1377 k = PetscMax(a->m,a->n); 1378 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work); 1379 } 1380 work = a->mult_work; 1381 for ( i=0; i<mbs; i++ ) { 1382 n = ii[1] - ii[0]; ii++; 1383 ncols = n*bs; 1384 workt = work; 1385 for ( j=0; j<n; j++ ) { 1386 xb = x + bs*(*idx++); 1387 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 1388 workt += bs; 1389 } 1390 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); 1391 v += n*bs2; 1392 z += bs; 1393 } 1394 VecRestoreArray_Fast(xx,x); 1395 VecRestoreArray_Fast(zz,z); 1396 PLogFlops(2*a->nz*bs2 ); 1397 return 0; 1398 } 1399 1400 #undef __FUNC__ 1401 #define __FUNC__ "MatMultTrans_SeqBAIJ" 1402 int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz) 1403 { 1404 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1405 Scalar *xg,*zg,*zb; 1406 register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 1407 int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 1408 int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1409 1410 1411 VecGetArray_Fast(xx,xg); x = xg; 1412 VecGetArray_Fast(zz,zg); z = zg; 1413 PetscMemzero(z,N*sizeof(Scalar)); 1414 1415 idx = a->j; 1416 v = a->a; 1417 ii = a->i; 1418 1419 switch (bs) { 1420 case 1: 1421 for ( i=0; i<mbs; i++ ) { 1422 n = ii[1] - ii[0]; ii++; 1423 xb = x + i; x1 = xb[0]; 1424 ib = idx + ai[i]; 1425 for ( j=0; j<n; j++ ) { 1426 rval = ib[j]; 1427 z[rval] += *v++ * x1; 1428 } 1429 } 1430 break; 1431 case 2: 1432 for ( i=0; i<mbs; i++ ) { 1433 n = ii[1] - ii[0]; ii++; 1434 xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1435 ib = idx + ai[i]; 1436 for ( j=0; j<n; j++ ) { 1437 rval = ib[j]*2; 1438 z[rval++] += v[0]*x1 + v[1]*x2; 1439 z[rval++] += v[2]*x1 + v[3]*x2; 1440 v += 4; 1441 } 1442 } 1443 break; 1444 case 3: 1445 for ( i=0; i<mbs; i++ ) { 1446 n = ii[1] - ii[0]; ii++; 1447 xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1448 ib = idx + ai[i]; 1449 for ( j=0; j<n; j++ ) { 1450 rval = ib[j]*3; 1451 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1452 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1453 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1454 v += 9; 1455 } 1456 } 1457 break; 1458 case 4: 1459 for ( i=0; i<mbs; i++ ) { 1460 n = ii[1] - ii[0]; ii++; 1461 xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1462 ib = idx + ai[i]; 1463 for ( j=0; j<n; j++ ) { 1464 rval = ib[j]*4; 1465 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1466 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1467 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1468 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1469 v += 16; 1470 } 1471 } 1472 break; 1473 case 5: 1474 for ( i=0; i<mbs; i++ ) { 1475 n = ii[1] - ii[0]; ii++; 1476 xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1477 x4 = xb[3]; x5 = xb[4]; 1478 ib = idx + ai[i]; 1479 for ( j=0; j<n; j++ ) { 1480 rval = ib[j]*5; 1481 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1482 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1483 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1484 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1485 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1486 v += 25; 1487 } 1488 } 1489 break; 1490 /* block sizes larger then 5 by 5 are handled by BLAS */ 1491 default: { 1492 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1493 if (!a->mult_work) { 1494 k = PetscMax(a->m,a->n); 1495 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1496 CHKPTRQ(a->mult_work); 1497 } 1498 work = a->mult_work; 1499 for ( i=0; i<mbs; i++ ) { 1500 n = ii[1] - ii[0]; ii++; 1501 ncols = n*bs; 1502 PetscMemzero(work,ncols*sizeof(Scalar)); 1503 LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1504 v += n*bs2; 1505 x += bs; 1506 workt = work; 1507 for ( j=0; j<n; j++ ) { 1508 zb = z + bs*(*idx++); 1509 for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1510 workt += bs; 1511 } 1512 } 1513 } 1514 } 1515 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 1516 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1517 PLogFlops(2*a->nz*a->bs2 - a->n); 1518 return 0; 1519 } 1520 1521 #undef __FUNC__ 1522 #define __FUNC__ "MatMultTransAdd_SeqBAIJ" 1523 int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1524 { 1525 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1526 Scalar *xg,*zg,*zb; 1527 register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 1528 int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 1529 int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1530 1531 1532 1533 VecGetArray_Fast(xx,xg); x = xg; 1534 VecGetArray_Fast(zz,zg); z = zg; 1535 1536 if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 1537 else PetscMemzero(z,N*sizeof(Scalar)); 1538 1539 1540 idx = a->j; 1541 v = a->a; 1542 ii = a->i; 1543 1544 switch (bs) { 1545 case 1: 1546 for ( i=0; i<mbs; i++ ) { 1547 n = ii[1] - ii[0]; ii++; 1548 xb = x + i; x1 = xb[0]; 1549 ib = idx + ai[i]; 1550 for ( j=0; j<n; j++ ) { 1551 rval = ib[j]; 1552 z[rval] += *v++ * x1; 1553 } 1554 } 1555 break; 1556 case 2: 1557 for ( i=0; i<mbs; i++ ) { 1558 n = ii[1] - ii[0]; ii++; 1559 xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1560 ib = idx + ai[i]; 1561 for ( j=0; j<n; j++ ) { 1562 rval = ib[j]*2; 1563 z[rval++] += v[0]*x1 + v[1]*x2; 1564 z[rval++] += v[2]*x1 + v[3]*x2; 1565 v += 4; 1566 } 1567 } 1568 break; 1569 case 3: 1570 for ( i=0; i<mbs; i++ ) { 1571 n = ii[1] - ii[0]; ii++; 1572 xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1573 ib = idx + ai[i]; 1574 for ( j=0; j<n; j++ ) { 1575 rval = ib[j]*3; 1576 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1577 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1578 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1579 v += 9; 1580 } 1581 } 1582 break; 1583 case 4: 1584 for ( i=0; i<mbs; i++ ) { 1585 n = ii[1] - ii[0]; ii++; 1586 xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1587 ib = idx + ai[i]; 1588 for ( j=0; j<n; j++ ) { 1589 rval = ib[j]*4; 1590 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1591 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1592 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1593 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1594 v += 16; 1595 } 1596 } 1597 break; 1598 case 5: 1599 for ( i=0; i<mbs; i++ ) { 1600 n = ii[1] - ii[0]; ii++; 1601 xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1602 x4 = xb[3]; x5 = xb[4]; 1603 ib = idx + ai[i]; 1604 for ( j=0; j<n; j++ ) { 1605 rval = ib[j]*5; 1606 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1607 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1608 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1609 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1610 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1611 v += 25; 1612 } 1613 } 1614 break; 1615 /* block sizes larger then 5 by 5 are handled by BLAS */ 1616 default: { 1617 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1618 if (!a->mult_work) { 1619 k = PetscMax(a->m,a->n); 1620 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1621 CHKPTRQ(a->mult_work); 1622 } 1623 work = a->mult_work; 1624 for ( i=0; i<mbs; i++ ) { 1625 n = ii[1] - ii[0]; ii++; 1626 ncols = n*bs; 1627 PetscMemzero(work,ncols*sizeof(Scalar)); 1628 LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1629 v += n*bs2; 1630 x += bs; 1631 workt = work; 1632 for ( j=0; j<n; j++ ) { 1633 zb = z + bs*(*idx++); 1634 for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1635 workt += bs; 1636 } 1637 } 1638 } 1639 } 1640 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 1641 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1642 PLogFlops(2*a->nz*a->bs2); 1643 return 0; 1644 } 1645 1646 #undef __FUNC__ 1647 #define __FUNC__ "MatGetInfo_SeqBAIJ" 1648 int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info) 1649 { 1650 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1651 1652 info->rows_global = (double)a->m; 1653 info->columns_global = (double)a->n; 1654 info->rows_local = (double)a->m; 1655 info->columns_local = (double)a->n; 1656 info->block_size = a->bs2; 1657 info->nz_allocated = a->maxnz; 1658 info->nz_used = a->bs2*a->nz; 1659 info->nz_unneeded = (double)(info->nz_allocated - info->nz_used); 1660 /* if (info->nz_unneeded != A->info.nz_unneeded) 1661 printf("space descrepancy: maxnz-nz = %d, nz_unneeded = %d\n",(int)info->nz_unneeded,(int)A->info.nz_unneeded); */ 1662 info->assemblies = A->num_ass; 1663 info->mallocs = a->reallocs; 1664 info->memory = A->mem; 1665 if (A->factor) { 1666 info->fill_ratio_given = A->info.fill_ratio_given; 1667 info->fill_ratio_needed = A->info.fill_ratio_needed; 1668 info->factor_mallocs = A->info.factor_mallocs; 1669 } else { 1670 info->fill_ratio_given = 0; 1671 info->fill_ratio_needed = 0; 1672 info->factor_mallocs = 0; 1673 } 1674 return 0; 1675 } 1676 1677 #undef __FUNC__ 1678 #define __FUNC__ "MatEqual_SeqBAIJ" 1679 int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg) 1680 { 1681 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 1682 1683 if (B->type !=MATSEQBAIJ)SETERRQ(1,0,"Matrices must be same type"); 1684 1685 /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 1686 if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)|| 1687 (a->nz != b->nz)) { 1688 *flg = PETSC_FALSE; return 0; 1689 } 1690 1691 /* if the a->i are the same */ 1692 if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) { 1693 *flg = PETSC_FALSE; return 0; 1694 } 1695 1696 /* if a->j are the same */ 1697 if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) { 1698 *flg = PETSC_FALSE; return 0; 1699 } 1700 1701 /* if a->a are the same */ 1702 if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) { 1703 *flg = PETSC_FALSE; return 0; 1704 } 1705 *flg = PETSC_TRUE; 1706 return 0; 1707 1708 } 1709 1710 #undef __FUNC__ 1711 #define __FUNC__ "MatGetDiagonal_SeqBAIJ" 1712 int MatGetDiagonal_SeqBAIJ(Mat A,Vec v) 1713 { 1714 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1715 int i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 1716 Scalar *x, zero = 0.0,*aa,*aa_j; 1717 1718 if (A->factor) SETERRQ(1,0,"Not for factored matrix"); 1719 bs = a->bs; 1720 aa = a->a; 1721 ai = a->i; 1722 aj = a->j; 1723 ambs = a->mbs; 1724 bs2 = a->bs2; 1725 1726 VecSet(&zero,v); 1727 VecGetArray_Fast(v,x); VecGetLocalSize_Fast(v,n); 1728 if (n != a->m) SETERRQ(1,0,"Nonconforming matrix and vector"); 1729 for ( i=0; i<ambs; i++ ) { 1730 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1731 if (aj[j] == i) { 1732 row = i*bs; 1733 aa_j = aa+j*bs2; 1734 for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 1735 break; 1736 } 1737 } 1738 } 1739 return 0; 1740 } 1741 1742 #undef __FUNC__ 1743 #define __FUNC__ "MatDiagonalScale_SeqBAIJ" 1744 int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr) 1745 { 1746 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1747 Scalar *l,*r,x,*v,*aa,*li,*ri; 1748 int i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2; 1749 1750 ai = a->i; 1751 aj = a->j; 1752 aa = a->a; 1753 m = a->m; 1754 n = a->n; 1755 bs = a->bs; 1756 mbs = a->mbs; 1757 bs2 = a->bs2; 1758 if (ll) { 1759 VecGetArray_Fast(ll,l); VecGetLocalSize_Fast(ll,lm); 1760 if (lm != m) SETERRQ(1,0,"Left scaling vector wrong length"); 1761 for ( i=0; i<mbs; i++ ) { /* for each block row */ 1762 M = ai[i+1] - ai[i]; 1763 li = l + i*bs; 1764 v = aa + bs2*ai[i]; 1765 for ( j=0; j<M; j++ ) { /* for each block */ 1766 for ( k=0; k<bs2; k++ ) { 1767 (*v++) *= li[k%bs]; 1768 } 1769 } 1770 } 1771 } 1772 1773 if (rr) { 1774 VecGetArray_Fast(rr,r); VecGetLocalSize_Fast(rr,rn); 1775 if (rn != n) SETERRQ(1,0,"Right scaling vector wrong length"); 1776 for ( i=0; i<mbs; i++ ) { /* for each block row */ 1777 M = ai[i+1] - ai[i]; 1778 v = aa + bs2*ai[i]; 1779 for ( j=0; j<M; j++ ) { /* for each block */ 1780 ri = r + bs*aj[ai[i]+j]; 1781 for ( k=0; k<bs; k++ ) { 1782 x = ri[k]; 1783 for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x; 1784 } 1785 } 1786 } 1787 } 1788 return 0; 1789 } 1790 1791 1792 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 1793 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 1794 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int); 1795 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*); 1796 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**); 1797 1798 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec); 1799 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec); 1800 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec); 1801 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec); 1802 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec); 1803 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec); 1804 extern int MatSolve_SeqBAIJ_7(Mat,Vec,Vec); 1805 1806 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 1807 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 1808 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 1809 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 1810 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 1811 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 1812 1813 #undef __FUNC__ 1814 #define __FUNC__ "MatNorm_SeqBAIJ" 1815 int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm) 1816 { 1817 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1818 Scalar *v = a->a; 1819 double sum = 0.0; 1820 int i,nz=a->nz,bs2=a->bs2; 1821 1822 if (type == NORM_FROBENIUS) { 1823 for (i=0; i< bs2*nz; i++ ) { 1824 #if defined(PETSC_COMPLEX) 1825 sum += real(conj(*v)*(*v)); v++; 1826 #else 1827 sum += (*v)*(*v); v++; 1828 #endif 1829 } 1830 *norm = sqrt(sum); 1831 } 1832 else { 1833 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 1834 } 1835 return 0; 1836 } 1837 1838 extern int MatSolve_SeqBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 1839 /* 1840 note: This can only work for identity for row and col. It would 1841 be good to check this and otherwise generate an error. 1842 */ 1843 #undef __FUNC__ 1844 #define __FUNC__ "MatILUFactor_SeqBAIJ" 1845 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill) 1846 { 1847 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1848 Mat outA; 1849 int ierr; 1850 1851 if (fill != 0) SETERRQ(1,0,"Only fill=0 supported"); 1852 1853 outA = inA; 1854 inA->factor = FACTOR_LU; 1855 a->row = row; 1856 a->col = col; 1857 1858 if (!a->solve_work) { 1859 a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 1860 PLogObjectMemory(inA,(a->m+a->bs)*sizeof(Scalar)); 1861 } 1862 1863 if (!a->diag) { 1864 ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr); 1865 } 1866 ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr); 1867 1868 /* 1869 Blocksize 4 has a special faster solver for ILU(0) factorization 1870 with natural ordering 1871 */ 1872 if (a->bs == 4) { 1873 inA->ops.solve = MatSolve_SeqBAIJ_4_NaturalOrdering; 1874 } 1875 1876 return 0; 1877 } 1878 1879 #undef __FUNC__ 1880 #define __FUNC__ "MatScale_SeqBAIJ" 1881 int MatScale_SeqBAIJ(Scalar *alpha,Mat inA) 1882 { 1883 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1884 int one = 1, totalnz = a->bs2*a->nz; 1885 BLscal_( &totalnz, alpha, a->a, &one ); 1886 PLogFlops(totalnz); 1887 return 0; 1888 } 1889 1890 #undef __FUNC__ 1891 #define __FUNC__ "MatGetValues_SeqBAIJ" 1892 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 1893 { 1894 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1895 int *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 1896 int *ai = a->i, *ailen = a->ilen; 1897 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 1898 Scalar *ap, *aa = a->a, zero = 0.0; 1899 1900 for ( k=0; k<m; k++ ) { /* loop over rows */ 1901 row = im[k]; brow = row/bs; 1902 if (row < 0) SETERRQ(1,0,"Negative row"); 1903 if (row >= a->m) SETERRQ(1,0,"Row too large"); 1904 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1905 nrow = ailen[brow]; 1906 for ( l=0; l<n; l++ ) { /* loop over columns */ 1907 if (in[l] < 0) SETERRQ(1,0,"Negative column"); 1908 if (in[l] >= a->n) SETERRQ(1,0,"Column too large"); 1909 col = in[l] ; 1910 bcol = col/bs; 1911 cidx = col%bs; 1912 ridx = row%bs; 1913 high = nrow; 1914 low = 0; /* assume unsorted */ 1915 while (high-low > 5) { 1916 t = (low+high)/2; 1917 if (rp[t] > bcol) high = t; 1918 else low = t; 1919 } 1920 for ( i=low; i<high; i++ ) { 1921 if (rp[i] > bcol) break; 1922 if (rp[i] == bcol) { 1923 *v++ = ap[bs2*i+bs*cidx+ridx]; 1924 goto finished; 1925 } 1926 } 1927 *v++ = zero; 1928 finished:; 1929 } 1930 } 1931 return 0; 1932 } 1933 1934 #undef __FUNC__ 1935 #define __FUNC__ "MatGetBlockSize_SeqBAIJ" 1936 int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs) 1937 { 1938 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data; 1939 *bs = baij->bs; 1940 return 0; 1941 } 1942 1943 /* idx should be of length atlease bs */ 1944 #undef __FUNC__ 1945 #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Block" 1946 static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg) 1947 { 1948 int i,row; 1949 row = idx[0]; 1950 if (row%bs!=0) { *flg = PETSC_FALSE; return 0; } 1951 1952 for ( i=1; i<bs; i++ ) { 1953 if (row+i != idx[i]) { *flg = PETSC_FALSE; return 0; } 1954 } 1955 *flg = PETSC_TRUE; 1956 return 0; 1957 } 1958 1959 #undef __FUNC__ 1960 #define __FUNC__ "MatZeroRows_SeqBAIJ" 1961 int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag) 1962 { 1963 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1964 IS is_local; 1965 int ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2; 1966 PetscTruth flg; 1967 Scalar zero = 0.0,*aa; 1968 1969 /* Make a copy of the IS and sort it */ 1970 ierr = ISGetSize(is,&is_n);CHKERRQ(ierr); 1971 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1972 ierr = ISCreateGeneral(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr); 1973 ierr = ISSort(is_local); CHKERRQ(ierr); 1974 ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr); 1975 1976 i = 0; 1977 while (i < is_n) { 1978 if (rows[i]<0 || rows[i]>m) SETERRQ(1,0,"row out of range"); 1979 flg = PETSC_FALSE; 1980 if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); } 1981 if (flg) { /* There exists a block of rows to be Zerowed */ 1982 baij->ilen[rows[i]/bs] = 0; 1983 i += bs; 1984 } else { /* Zero out only the requested row */ 1985 count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs; 1986 aa = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs); 1987 for ( j=0; j<count; j++ ) { 1988 aa[0] = zero; 1989 aa+=bs; 1990 } 1991 i++; 1992 } 1993 } 1994 if (diag) { 1995 for ( j=0; j<is_n; j++ ) { 1996 ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1997 } 1998 } 1999 ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr); 2000 ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr); 2001 ierr = ISDestroy(is_local); CHKERRQ(ierr); 2002 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2003 2004 return 0; 2005 } 2006 2007 #undef __FUNC__ 2008 #define __FUNC__ "MatPrintHelp_SeqBAIJ" 2009 int MatPrintHelp_SeqBAIJ(Mat A) 2010 { 2011 static int called = 0; 2012 MPI_Comm comm = A->comm; 2013 2014 if (called) return 0; else called = 1; 2015 PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n"); 2016 PetscPrintf(comm," -mat_block_size <block_size>\n"); 2017 return 0; 2018 } 2019 2020 /* -------------------------------------------------------------------*/ 2021 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ, 2022 MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ, 2023 MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N, 2024 MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ, 2025 MatSolve_SeqBAIJ_N,0, 2026 0,0, 2027 MatLUFactor_SeqBAIJ,0, 2028 0, 2029 MatTranspose_SeqBAIJ, 2030 MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ, 2031 MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ, 2032 0,MatAssemblyEnd_SeqBAIJ, 2033 0, 2034 MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ, 2035 MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0, 2036 MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ, 2037 MatILUFactorSymbolic_SeqBAIJ,0, 2038 0,0, 2039 MatConvertSameType_SeqBAIJ,0,0, 2040 MatILUFactor_SeqBAIJ,0,0, 2041 MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ, 2042 MatGetValues_SeqBAIJ,0, 2043 MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ, 2044 0,0,0,MatGetBlockSize_SeqBAIJ, 2045 MatGetRowIJ_SeqBAIJ, 2046 MatRestoreRowIJ_SeqBAIJ, 2047 0,0,0,0,0,0, 2048 MatSetValuesBlocked_SeqBAIJ}; 2049 2050 #undef __FUNC__ 2051 #define __FUNC__ "MatCreateSeqBAIJ" 2052 /*@C 2053 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 2054 compressed row) format. For good matrix assembly performance the 2055 user should preallocate the matrix storage by setting the parameter nz 2056 (or the array nzz). By setting these parameters accurately, performance 2057 during matrix assembly can be increased by more than a factor of 50. 2058 2059 Input Parameters: 2060 . comm - MPI communicator, set to PETSC_COMM_SELF 2061 . bs - size of block 2062 . m - number of rows 2063 . n - number of columns 2064 . nz - number of block nonzeros per block row (same for all rows) 2065 . nzz - array containing the number of block nonzeros in the various block rows 2066 (possibly different for each block row) or PETSC_NULL 2067 2068 Output Parameter: 2069 . A - the matrix 2070 2071 Options Database Keys: 2072 $ -mat_no_unroll - uses code that does not unroll the loops in the 2073 $ block calculations (much slower) 2074 $ -mat_block_size - size of the blocks to use 2075 2076 Notes: 2077 The block AIJ format is fully compatible with standard Fortran 77 2078 storage. That is, the stored row and column indices can begin at 2079 either one (as in Fortran) or zero. See the users' manual for details. 2080 2081 Specify the preallocated storage with either nz or nnz (not both). 2082 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2083 allocation. For additional details, see the users manual chapter on 2084 matrices. 2085 2086 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2087 @*/ 2088 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 2089 { 2090 Mat B; 2091 Mat_SeqBAIJ *b; 2092 int i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs,size; 2093 2094 MPI_Comm_size(comm,&size); 2095 if (size > 1) SETERRQ(1,0,"Comm must be of size 1"); 2096 2097 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr); 2098 2099 if (mbs*bs!=m || nbs*bs!=n) 2100 SETERRQ(1,0,"Number rows, cols must be divisible by blocksize"); 2101 2102 *A = 0; 2103 PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATSEQBAIJ,comm,MatDestroy,MatView); 2104 PLogObjectCreate(B); 2105 B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b); 2106 PetscMemzero(b,sizeof(Mat_SeqBAIJ)); 2107 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 2108 ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr); 2109 if (!flg) { 2110 switch (bs) { 2111 case 1: 2112 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2113 B->ops.solve = MatSolve_SeqBAIJ_1; 2114 B->ops.mult = MatMult_SeqBAIJ_1; 2115 B->ops.multadd = MatMultAdd_SeqBAIJ_1; 2116 break; 2117 case 2: 2118 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2119 B->ops.solve = MatSolve_SeqBAIJ_2; 2120 B->ops.mult = MatMult_SeqBAIJ_2; 2121 B->ops.multadd = MatMultAdd_SeqBAIJ_2; 2122 break; 2123 case 3: 2124 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2125 B->ops.solve = MatSolve_SeqBAIJ_3; 2126 B->ops.mult = MatMult_SeqBAIJ_3; 2127 B->ops.multadd = MatMultAdd_SeqBAIJ_3; 2128 break; 2129 case 4: 2130 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2131 B->ops.solve = MatSolve_SeqBAIJ_4; 2132 B->ops.mult = MatMult_SeqBAIJ_4; 2133 B->ops.multadd = MatMultAdd_SeqBAIJ_4; 2134 break; 2135 case 5: 2136 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2137 B->ops.solve = MatSolve_SeqBAIJ_5; 2138 B->ops.mult = MatMult_SeqBAIJ_5; 2139 B->ops.multadd = MatMultAdd_SeqBAIJ_5; 2140 break; 2141 case 7: 2142 B->ops.mult = MatMult_SeqBAIJ_7; 2143 B->ops.solve = MatSolve_SeqBAIJ_7; 2144 B->ops.multadd = MatMultAdd_SeqBAIJ_7; 2145 break; 2146 } 2147 } 2148 B->destroy = MatDestroy_SeqBAIJ; 2149 B->view = MatView_SeqBAIJ; 2150 B->factor = 0; 2151 B->lupivotthreshold = 1.0; 2152 B->mapping = 0; 2153 b->row = 0; 2154 b->col = 0; 2155 b->reallocs = 0; 2156 2157 b->m = m; B->m = m; B->M = m; 2158 b->n = n; B->n = n; B->N = n; 2159 b->mbs = mbs; 2160 b->nbs = nbs; 2161 b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax); 2162 if (nnz == PETSC_NULL) { 2163 if (nz == PETSC_DEFAULT) nz = 5; 2164 else if (nz <= 0) nz = 1; 2165 for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 2166 nz = nz*mbs; 2167 } 2168 else { 2169 nz = 0; 2170 for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 2171 } 2172 2173 /* allocate the matrix space */ 2174 len = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int); 2175 b->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a); 2176 PetscMemzero(b->a,nz*bs2*sizeof(Scalar)); 2177 b->j = (int *) (b->a + nz*bs2); 2178 PetscMemzero(b->j,nz*sizeof(int)); 2179 b->i = b->j + nz; 2180 b->singlemalloc = 1; 2181 2182 b->i[0] = 0; 2183 for (i=1; i<mbs+1; i++) { 2184 b->i[i] = b->i[i-1] + b->imax[i-1]; 2185 } 2186 2187 /* b->ilen will count nonzeros in each block row so far. */ 2188 b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 2189 PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 2190 for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 2191 2192 b->bs = bs; 2193 b->bs2 = bs2; 2194 b->mbs = mbs; 2195 b->nz = 0; 2196 b->maxnz = nz*bs2; 2197 b->sorted = 0; 2198 b->roworiented = 1; 2199 b->nonew = 0; 2200 b->diag = 0; 2201 b->solve_work = 0; 2202 b->mult_work = 0; 2203 b->spptr = 0; 2204 B->info.nz_unneeded = (double)b->maxnz; 2205 2206 *A = B; 2207 ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr); 2208 if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); } 2209 return 0; 2210 } 2211 2212 #undef __FUNC__ 2213 #define __FUNC__ "MatConvertSameType_SeqBAIJ" 2214 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues) 2215 { 2216 Mat C; 2217 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 2218 int i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 2219 2220 if (a->i[mbs] != nz) SETERRQ(1,0,"Corrupt matrix"); 2221 2222 *B = 0; 2223 PetscHeaderCreate(C,_p_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm,MatDestroy,MatView); 2224 PLogObjectCreate(C); 2225 C->data = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c); 2226 PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps)); 2227 C->destroy = MatDestroy_SeqBAIJ; 2228 C->view = MatView_SeqBAIJ; 2229 C->factor = A->factor; 2230 c->row = 0; 2231 c->col = 0; 2232 C->assembled = PETSC_TRUE; 2233 2234 c->m = C->m = a->m; 2235 c->n = C->n = a->n; 2236 C->M = a->m; 2237 C->N = a->n; 2238 2239 c->bs = a->bs; 2240 c->bs2 = a->bs2; 2241 c->mbs = a->mbs; 2242 c->nbs = a->nbs; 2243 2244 c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax); 2245 c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen); 2246 for ( i=0; i<mbs; i++ ) { 2247 c->imax[i] = a->imax[i]; 2248 c->ilen[i] = a->ilen[i]; 2249 } 2250 2251 /* allocate the matrix space */ 2252 c->singlemalloc = 1; 2253 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int)); 2254 c->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a); 2255 c->j = (int *) (c->a + nz*bs2); 2256 c->i = c->j + nz; 2257 PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int)); 2258 if (mbs > 0) { 2259 PetscMemcpy(c->j,a->j,nz*sizeof(int)); 2260 if (cpvalues == COPY_VALUES) { 2261 PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar)); 2262 } 2263 } 2264 2265 PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 2266 c->sorted = a->sorted; 2267 c->roworiented = a->roworiented; 2268 c->nonew = a->nonew; 2269 2270 if (a->diag) { 2271 c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag); 2272 PLogObjectMemory(C,(mbs+1)*sizeof(int)); 2273 for ( i=0; i<mbs; i++ ) { 2274 c->diag[i] = a->diag[i]; 2275 } 2276 } 2277 else c->diag = 0; 2278 c->nz = a->nz; 2279 c->maxnz = a->maxnz; 2280 c->solve_work = 0; 2281 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 2282 c->mult_work = 0; 2283 *B = C; 2284 return 0; 2285 } 2286 2287 #undef __FUNC__ 2288 #define __FUNC__ "MatLoad_SeqBAIJ" 2289 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 2290 { 2291 Mat_SeqBAIJ *a; 2292 Mat B; 2293 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg; 2294 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2295 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 2296 int *masked, nmask,tmp,bs2,ishift; 2297 Scalar *aa; 2298 MPI_Comm comm = ((PetscObject) viewer)->comm; 2299 2300 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 2301 bs2 = bs*bs; 2302 2303 MPI_Comm_size(comm,&size); 2304 if (size > 1) SETERRQ(1,0,"view must have one processor"); 2305 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 2306 ierr = PetscBinaryRead(fd,header,4,PETSC_INT); CHKERRQ(ierr); 2307 if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not Mat object"); 2308 M = header[1]; N = header[2]; nz = header[3]; 2309 2310 if (M != N) SETERRQ(1,0,"Can only do square matrices"); 2311 2312 /* 2313 This code adds extra rows to make sure the number of rows is 2314 divisible by the blocksize 2315 */ 2316 mbs = M/bs; 2317 extra_rows = bs - M + bs*(mbs); 2318 if (extra_rows == bs) extra_rows = 0; 2319 else mbs++; 2320 if (extra_rows) { 2321 PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 2322 } 2323 2324 /* read in row lengths */ 2325 rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 2326 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 2327 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 2328 2329 /* read in column indices */ 2330 jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj); 2331 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT); CHKERRQ(ierr); 2332 for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 2333 2334 /* loop over row lengths determining block row lengths */ 2335 browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 2336 PetscMemzero(browlengths,mbs*sizeof(int)); 2337 mask = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask); 2338 PetscMemzero(mask,mbs*sizeof(int)); 2339 masked = mask + mbs; 2340 rowcount = 0; nzcount = 0; 2341 for ( i=0; i<mbs; i++ ) { 2342 nmask = 0; 2343 for ( j=0; j<bs; j++ ) { 2344 kmax = rowlengths[rowcount]; 2345 for ( k=0; k<kmax; k++ ) { 2346 tmp = jj[nzcount++]/bs; 2347 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2348 } 2349 rowcount++; 2350 } 2351 browlengths[i] += nmask; 2352 /* zero out the mask elements we set */ 2353 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 2354 } 2355 2356 /* create our matrix */ 2357 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr); 2358 B = *A; 2359 a = (Mat_SeqBAIJ *) B->data; 2360 2361 /* set matrix "i" values */ 2362 a->i[0] = 0; 2363 for ( i=1; i<= mbs; i++ ) { 2364 a->i[i] = a->i[i-1] + browlengths[i-1]; 2365 a->ilen[i-1] = browlengths[i-1]; 2366 } 2367 a->nz = 0; 2368 for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 2369 2370 /* read in nonzero values */ 2371 aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 2372 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR); CHKERRQ(ierr); 2373 for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 2374 2375 /* set "a" and "j" values into matrix */ 2376 nzcount = 0; jcount = 0; 2377 for ( i=0; i<mbs; i++ ) { 2378 nzcountb = nzcount; 2379 nmask = 0; 2380 for ( j=0; j<bs; j++ ) { 2381 kmax = rowlengths[i*bs+j]; 2382 for ( k=0; k<kmax; k++ ) { 2383 tmp = jj[nzcount++]/bs; 2384 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2385 } 2386 rowcount++; 2387 } 2388 /* sort the masked values */ 2389 PetscSortInt(nmask,masked); 2390 2391 /* set "j" values into matrix */ 2392 maskcount = 1; 2393 for ( j=0; j<nmask; j++ ) { 2394 a->j[jcount++] = masked[j]; 2395 mask[masked[j]] = maskcount++; 2396 } 2397 /* set "a" values into matrix */ 2398 ishift = bs2*a->i[i]; 2399 for ( j=0; j<bs; j++ ) { 2400 kmax = rowlengths[i*bs+j]; 2401 for ( k=0; k<kmax; k++ ) { 2402 tmp = jj[nzcountb]/bs ; 2403 block = mask[tmp] - 1; 2404 point = jj[nzcountb] - bs*tmp; 2405 idx = ishift + bs2*block + j + bs*point; 2406 a->a[idx] = aa[nzcountb++]; 2407 } 2408 } 2409 /* zero out the mask elements we set */ 2410 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 2411 } 2412 if (jcount != a->nz) SETERRQ(1,0,"Error bad binary matrix"); 2413 2414 PetscFree(rowlengths); 2415 PetscFree(browlengths); 2416 PetscFree(aa); 2417 PetscFree(jj); 2418 PetscFree(mask); 2419 2420 B->assembled = PETSC_TRUE; 2421 2422 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info",&flg); CHKERRQ(ierr); 2423 if (flg) { 2424 Viewer tviewer; 2425 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2426 ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO,0);CHKERRQ(ierr); 2427 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2428 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2429 } 2430 ierr = OptionsHasName(PETSC_NULL,"-mat_view_info_detailed",&flg);CHKERRQ(ierr); 2431 if (flg) { 2432 Viewer tviewer; 2433 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2434 ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_INFO_LONG,0);CHKERRQ(ierr); 2435 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2436 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2437 } 2438 ierr = OptionsHasName(PETSC_NULL,"-mat_view",&flg); CHKERRQ(ierr); 2439 if (flg) { 2440 Viewer tviewer; 2441 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2442 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2443 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2444 } 2445 ierr = OptionsHasName(PETSC_NULL,"-mat_view_matlab",&flg); CHKERRQ(ierr); 2446 if (flg) { 2447 Viewer tviewer; 2448 ierr = ViewerFileOpenASCII(B->comm,"stdout",&tviewer);CHKERRQ(ierr); 2449 ierr = ViewerSetFormat(tviewer,VIEWER_FORMAT_ASCII_MATLAB,"M");CHKERRQ(ierr); 2450 ierr = MatView(B,tviewer); CHKERRQ(ierr); 2451 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2452 } 2453 ierr = OptionsHasName(PETSC_NULL,"-mat_view_draw",&flg); CHKERRQ(ierr); 2454 if (flg) { 2455 Viewer tviewer; 2456 ierr = ViewerDrawOpenX(B->comm,0,0,0,0,300,300,&tviewer); CHKERRQ(ierr); 2457 ierr = MatView(B,(Viewer)tviewer); CHKERRQ(ierr); 2458 ierr = ViewerFlush(tviewer); CHKERRQ(ierr); 2459 ierr = ViewerDestroy(tviewer); CHKERRQ(ierr); 2460 } 2461 return 0; 2462 } 2463 2464 2465 2466