1 2 #ifdef PETSC_RCS_HEADER 3 static char vcid[] = "$Id: baij.c,v 1.113 1997/10/01 22:39:40 bsmith Exp bsmith $"; 4 #endif 5 6 /* 7 Defines the basic matrix operations for the BAIJ (compressed row) 8 matrix storage format. 9 */ 10 11 #include "pinclude/pviewer.h" 12 #include "sys.h" 13 #include "src/mat/impls/baij/seq/baij.h" 14 #include "src/vec/vecimpl.h" 15 #include "src/inline/spops.h" 16 #include "petsc.h" 17 18 19 /* 20 Adds diagonal pointers to sparse matrix structure. 21 */ 22 23 #undef __FUNC__ 24 #define __FUNC__ "MatMarkDiag_SeqBAIJ" 25 int MatMarkDiag_SeqBAIJ(Mat A) 26 { 27 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 28 int i,j, *diag, m = a->mbs; 29 30 PetscFunctionBegin; 31 diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag); 32 PLogObjectMemory(A,(m+1)*sizeof(int)); 33 for ( i=0; i<m; i++ ) { 34 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 35 if (a->j[j] == i) { 36 diag[i] = j; 37 break; 38 } 39 } 40 } 41 a->diag = diag; 42 PetscFunctionReturn(0); 43 } 44 45 46 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 47 48 #undef __FUNC__ 49 #define __FUNC__ "MatGetRowIJ_SeqBAIJ" 50 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 51 PetscTruth *done) 52 { 53 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 54 int ierr, n = a->mbs,i; 55 56 PetscFunctionBegin; 57 *nn = n; 58 if (!ia) PetscFunctionReturn(0); 59 if (symmetric) { 60 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 61 } else if (oshift == 1) { 62 /* temporarily add 1 to i and j indices */ 63 int nz = a->i[n] + 1; 64 for ( i=0; i<nz; i++ ) a->j[i]++; 65 for ( i=0; i<n+1; i++ ) a->i[i]++; 66 *ia = a->i; *ja = a->j; 67 } else { 68 *ia = a->i; *ja = a->j; 69 } 70 71 PetscFunctionReturn(0); 72 } 73 74 #undef __FUNC__ 75 #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ" 76 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 77 PetscTruth *done) 78 { 79 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 80 int i,n = a->mbs; 81 82 PetscFunctionBegin; 83 if (!ia) PetscFunctionReturn(0); 84 if (symmetric) { 85 PetscFree(*ia); 86 PetscFree(*ja); 87 } else if (oshift == 1) { 88 int nz = a->i[n]; 89 for ( i=0; i<nz; i++ ) a->j[i]--; 90 for ( i=0; i<n+1; i++ ) a->i[i]--; 91 } 92 PetscFunctionReturn(0); 93 } 94 95 96 #undef __FUNC__ 97 #define __FUNC__ "MatView_SeqBAIJ_Binary" 98 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 99 { 100 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 101 int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 102 Scalar *aa; 103 104 PetscFunctionBegin; 105 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 106 col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 107 col_lens[0] = MAT_COOKIE; 108 109 col_lens[1] = a->m; 110 col_lens[2] = a->n; 111 col_lens[3] = a->nz*bs2; 112 113 /* store lengths of each row and write (including header) to file */ 114 count = 0; 115 for ( i=0; i<a->mbs; i++ ) { 116 for ( j=0; j<bs; j++ ) { 117 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 118 } 119 } 120 ierr = PetscBinaryWrite(fd,col_lens,4+a->m,PETSC_INT,1); CHKERRQ(ierr); 121 PetscFree(col_lens); 122 123 /* store column indices (zero start index) */ 124 jj = (int *) PetscMalloc( (a->nz+1)*bs2*sizeof(int) ); CHKPTRQ(jj); 125 count = 0; 126 for ( i=0; i<a->mbs; i++ ) { 127 for ( j=0; j<bs; j++ ) { 128 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 129 for ( l=0; l<bs; l++ ) { 130 jj[count++] = bs*a->j[k] + l; 131 } 132 } 133 } 134 } 135 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0); CHKERRQ(ierr); 136 PetscFree(jj); 137 138 /* store nonzero values */ 139 aa = (Scalar *) PetscMalloc((a->nz+1)*bs2*sizeof(Scalar)); CHKPTRQ(aa); 140 count = 0; 141 for ( i=0; i<a->mbs; i++ ) { 142 for ( j=0; j<bs; j++ ) { 143 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 144 for ( l=0; l<bs; l++ ) { 145 aa[count++] = a->a[bs2*k + l*bs + j]; 146 } 147 } 148 } 149 } 150 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0); CHKERRQ(ierr); 151 PetscFree(aa); 152 PetscFunctionReturn(0); 153 } 154 155 #undef __FUNC__ 156 #define __FUNC__ "MatView_SeqBAIJ_ASCII" 157 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 158 { 159 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 160 int ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2; 161 FILE *fd; 162 char *outputname; 163 164 PetscFunctionBegin; 165 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 166 ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr); 167 ierr = ViewerGetFormat(viewer,&format); 168 if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) { 169 fprintf(fd," block size is %d\n",bs); 170 } 171 else if (format == VIEWER_FORMAT_ASCII_MATLAB) { 172 SETERRQ(1,0,"Matlab format not supported"); 173 } 174 else if (format == VIEWER_FORMAT_ASCII_COMMON) { 175 for ( i=0; i<a->mbs; i++ ) { 176 for ( j=0; j<bs; j++ ) { 177 fprintf(fd,"row %d:",i*bs+j); 178 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 179 for ( l=0; l<bs; l++ ) { 180 #if defined(USE_PETSC_COMPLEX) 181 if (imag(a->a[bs2*k + l*bs + j]) > 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0) 182 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 183 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 184 else if (imag(a->a[bs2*k + l*bs + j]) < 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0) 185 fprintf(fd," %d %g - %g i",bs*a->j[k]+l, 186 real(a->a[bs2*k + l*bs + j]),-imag(a->a[bs2*k + l*bs + j])); 187 else if (real(a->a[bs2*k + l*bs + j]) != 0.0) 188 fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 189 #else 190 if (a->a[bs2*k + l*bs + j] != 0.0) 191 fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 192 #endif 193 } 194 } 195 fprintf(fd,"\n"); 196 } 197 } 198 } 199 else { 200 for ( i=0; i<a->mbs; i++ ) { 201 for ( j=0; j<bs; j++ ) { 202 fprintf(fd,"row %d:",i*bs+j); 203 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 204 for ( l=0; l<bs; l++ ) { 205 #if defined(USE_PETSC_COMPLEX) 206 if (imag(a->a[bs2*k + l*bs + j]) > 0.0) { 207 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 208 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 209 } 210 else if (imag(a->a[bs2*k + l*bs + j]) < 0.0) { 211 fprintf(fd," %d %g - %g i",bs*a->j[k]+l, 212 real(a->a[bs2*k + l*bs + j]),-imag(a->a[bs2*k + l*bs + j])); 213 } 214 else { 215 fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 216 } 217 #else 218 fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 219 #endif 220 } 221 } 222 fprintf(fd,"\n"); 223 } 224 } 225 } 226 fflush(fd); 227 PetscFunctionReturn(0); 228 } 229 230 #undef __FUNC__ 231 #define __FUNC__ "MatView_SeqBAIJ_Draw" 232 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer) 233 { 234 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *) A->data; 235 int row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2; 236 double xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r; 237 Scalar *aa; 238 Draw draw; 239 DrawButton button; 240 PetscTruth isnull; 241 242 PetscFunctionBegin; 243 ierr = ViewerDrawGetDraw(viewer,&draw);CHKERRQ(ierr); 244 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 245 246 xr = a->n; yr = a->m; h = yr/10.0; w = xr/10.0; 247 xr += w; yr += h; xl = -w; yl = -h; 248 ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr); 249 /* loop over matrix elements drawing boxes */ 250 color = DRAW_BLUE; 251 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 252 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 253 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 254 x_l = a->j[j]*bs; x_r = x_l + 1.0; 255 aa = a->a + j*bs2; 256 for ( k=0; k<bs; k++ ) { 257 for ( l=0; l<bs; l++ ) { 258 if (PetscReal(*aa++) >= 0.) continue; 259 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 260 } 261 } 262 } 263 } 264 color = DRAW_CYAN; 265 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 266 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 267 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 268 x_l = a->j[j]*bs; x_r = x_l + 1.0; 269 aa = a->a + j*bs2; 270 for ( k=0; k<bs; k++ ) { 271 for ( l=0; l<bs; l++ ) { 272 if (PetscReal(*aa++) != 0.) continue; 273 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 274 } 275 } 276 } 277 } 278 279 color = DRAW_RED; 280 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 281 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 282 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 283 x_l = a->j[j]*bs; x_r = x_l + 1.0; 284 aa = a->a + j*bs2; 285 for ( k=0; k<bs; k++ ) { 286 for ( l=0; l<bs; l++ ) { 287 if (PetscReal(*aa++) <= 0.) continue; 288 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 289 } 290 } 291 } 292 } 293 294 DrawFlush(draw); 295 DrawGetPause(draw,&pause); 296 if (pause >= 0) { PetscSleep(pause); PetscFunctionReturn(0);} 297 298 /* allow the matrix to zoom or shrink */ 299 ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0); 300 while (button != BUTTON_RIGHT) { 301 DrawClear(draw); 302 if (button == BUTTON_LEFT) scale = .5; 303 else if (button == BUTTON_CENTER) scale = 2.; 304 xl = scale*(xl + w - xc) + xc - w*scale; 305 xr = scale*(xr - w - xc) + xc + w*scale; 306 yl = scale*(yl + h - yc) + yc - h*scale; 307 yr = scale*(yr - h - yc) + yc + h*scale; 308 w *= scale; h *= scale; 309 ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr); 310 color = DRAW_BLUE; 311 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 312 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 313 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 314 x_l = a->j[j]*bs; x_r = x_l + 1.0; 315 aa = a->a + j*bs2; 316 for ( k=0; k<bs; k++ ) { 317 for ( l=0; l<bs; l++ ) { 318 if (PetscReal(*aa++) >= 0.) continue; 319 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 320 } 321 } 322 } 323 } 324 color = DRAW_CYAN; 325 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 326 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 327 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 328 x_l = a->j[j]*bs; x_r = x_l + 1.0; 329 aa = a->a + j*bs2; 330 for ( k=0; k<bs; k++ ) { 331 for ( l=0; l<bs; l++ ) { 332 if (PetscReal(*aa++) != 0.) continue; 333 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 334 } 335 } 336 } 337 } 338 339 color = DRAW_RED; 340 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 341 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 342 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 343 x_l = a->j[j]*bs; x_r = x_l + 1.0; 344 aa = a->a + j*bs2; 345 for ( k=0; k<bs; k++ ) { 346 for ( l=0; l<bs; l++ ) { 347 if (PetscReal(*aa++) <= 0.) continue; 348 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 349 } 350 } 351 } 352 } 353 ierr = DrawGetMouseButton(draw,&button,&xc,&yc,0,0); 354 } 355 PetscFunctionReturn(0); 356 } 357 358 #undef __FUNC__ 359 #define __FUNC__ "MatView_SeqBAIJ" 360 int MatView_SeqBAIJ(PetscObject obj,Viewer viewer) 361 { 362 Mat A = (Mat) obj; 363 ViewerType vtype; 364 int ierr; 365 366 PetscFunctionBegin; 367 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 368 if (vtype == MATLAB_VIEWER) { 369 SETERRQ(1,0,"Matlab viewer not supported"); 370 } else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){ 371 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 372 } else if (vtype == BINARY_FILE_VIEWER) { 373 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 374 } else if (vtype == DRAW_VIEWER) { 375 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 376 } 377 PetscFunctionReturn(0); 378 } 379 380 #define CHUNKSIZE 10 381 382 #undef __FUNC__ 383 #define __FUNC__ "MatSetValues_SeqBAIJ" 384 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 385 { 386 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 387 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 388 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 389 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 390 int ridx,cidx,bs2=a->bs2; 391 Scalar *ap,value,*aa=a->a,*bap; 392 393 PetscFunctionBegin; 394 for ( k=0; k<m; k++ ) { /* loop over added rows */ 395 row = im[k]; brow = row/bs; 396 #if defined(USE_PETSC_BOPT_g) 397 if (row < 0) SETERRQ(1,0,"Negative row"); 398 if (row >= a->m) SETERRQ(1,0,"Row too large"); 399 #endif 400 rp = aj + ai[brow]; 401 ap = aa + bs2*ai[brow]; 402 rmax = imax[brow]; 403 nrow = ailen[brow]; 404 low = 0; 405 for ( l=0; l<n; l++ ) { /* loop over added columns */ 406 #if defined(USE_PETSC_BOPT_g) 407 if (in[l] < 0) SETERRQ(1,0,"Negative column"); 408 if (in[l] >= a->n) SETERRQ(1,0,"Column too large"); 409 #endif 410 col = in[l]; bcol = col/bs; 411 ridx = row % bs; cidx = col % bs; 412 if (roworiented) { 413 value = *v++; 414 } else { 415 value = v[k + l*m]; 416 } 417 if (!sorted) low = 0; high = nrow; 418 while (high-low > 7) { 419 t = (low+high)/2; 420 if (rp[t] > bcol) high = t; 421 else low = t; 422 } 423 for ( i=low; i<high; i++ ) { 424 if (rp[i] > bcol) break; 425 if (rp[i] == bcol) { 426 bap = ap + bs2*i + bs*cidx + ridx; 427 if (is == ADD_VALUES) *bap += value; 428 else *bap = value; 429 goto noinsert1; 430 } 431 } 432 if (nonew == 1) goto noinsert1; 433 else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); 434 if (nrow >= rmax) { 435 /* there is no extra room in row, therefore enlarge */ 436 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 437 Scalar *new_a; 438 439 if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); 440 441 /* Malloc new storage space */ 442 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 443 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 444 new_j = (int *) (new_a + bs2*new_nz); 445 new_i = new_j + new_nz; 446 447 /* copy over old data into new slots */ 448 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} 449 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 450 PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); 451 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 452 PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, 453 len*sizeof(int)); 454 PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); 455 PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 456 PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), 457 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); 458 /* free up old matrix storage */ 459 PetscFree(a->a); 460 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 461 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 462 a->singlemalloc = 1; 463 464 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 465 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 466 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 467 a->maxnz += bs2*CHUNKSIZE; 468 a->reallocs++; 469 a->nz++; 470 } 471 N = nrow++ - 1; 472 /* shift up all the later entries in this row */ 473 for ( ii=N; ii>=i; ii-- ) { 474 rp[ii+1] = rp[ii]; 475 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 476 } 477 if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 478 rp[i] = bcol; 479 ap[bs2*i + bs*cidx + ridx] = value; 480 noinsert1:; 481 low = i; 482 } 483 ailen[brow] = nrow; 484 } 485 PetscFunctionReturn(0); 486 } 487 488 #undef __FUNC__ 489 #define __FUNC__ "MatSetValuesBlocked_SeqBAIJ" 490 int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 491 { 492 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 493 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 494 int *imax=a->imax,*ai=a->i,*ailen=a->ilen, roworiented=a->roworiented; 495 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval; 496 Scalar *ap,*value=v,*aa=a->a,*bap; 497 498 PetscFunctionBegin; 499 if (roworiented) { 500 stepval = (n-1)*bs; 501 } else { 502 stepval = (m-1)*bs; 503 } 504 for ( k=0; k<m; k++ ) { /* loop over added rows */ 505 row = im[k]; 506 #if defined(USE_PETSC_BOPT_g) 507 if (row < 0) SETERRQ(1,0,"Negative row"); 508 if (row >= a->mbs) SETERRQ(1,0,"Row too large"); 509 #endif 510 rp = aj + ai[row]; 511 ap = aa + bs2*ai[row]; 512 rmax = imax[row]; 513 nrow = ailen[row]; 514 low = 0; 515 for ( l=0; l<n; l++ ) { /* loop over added columns */ 516 #if defined(USE_PETSC_BOPT_g) 517 if (in[l] < 0) SETERRQ(1,0,"Negative column"); 518 if (in[l] >= a->nbs) SETERRQ(1,0,"Column too large"); 519 #endif 520 col = in[l]; 521 if (roworiented) { 522 value = v + k*(stepval+bs)*bs + l*bs; 523 } else { 524 value = v + l*(stepval+bs)*bs + k*bs; 525 } 526 if (!sorted) low = 0; high = nrow; 527 while (high-low > 7) { 528 t = (low+high)/2; 529 if (rp[t] > col) high = t; 530 else low = t; 531 } 532 for ( i=low; i<high; i++ ) { 533 if (rp[i] > col) break; 534 if (rp[i] == col) { 535 bap = ap + bs2*i; 536 if (roworiented) { 537 if (is == ADD_VALUES) { 538 for ( ii=0; ii<bs; ii++,value+=stepval ) { 539 for (jj=ii; jj<bs2; jj+=bs ) { 540 bap[jj] += *value++; 541 } 542 } 543 } else { 544 for ( ii=0; ii<bs; ii++,value+=stepval ) { 545 for (jj=ii; jj<bs2; jj+=bs ) { 546 bap[jj] = *value++; 547 } 548 } 549 } 550 } else { 551 if (is == ADD_VALUES) { 552 for ( ii=0; ii<bs; ii++,value+=stepval ) { 553 for (jj=0; jj<bs; jj++ ) { 554 *bap++ += *value++; 555 } 556 } 557 } else { 558 for ( ii=0; ii<bs; ii++,value+=stepval ) { 559 for (jj=0; jj<bs; jj++ ) { 560 *bap++ = *value++; 561 } 562 } 563 } 564 } 565 goto noinsert2; 566 } 567 } 568 if (nonew == 1) goto noinsert2; 569 else if (nonew == -1) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); 570 if (nrow >= rmax) { 571 /* there is no extra room in row, therefore enlarge */ 572 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 573 Scalar *new_a; 574 575 if (nonew == -2) SETERRQ(1,0,"Inserting a new nonzero in the matrix"); 576 577 /* malloc new storage space */ 578 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 579 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 580 new_j = (int *) (new_a + bs2*new_nz); 581 new_i = new_j + new_nz; 582 583 /* copy over old data into new slots */ 584 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} 585 for ( ii=row+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 586 PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int)); 587 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 588 PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int)); 589 PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(Scalar)); 590 PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 591 PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(Scalar)); 592 /* free up old matrix storage */ 593 PetscFree(a->a); 594 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 595 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 596 a->singlemalloc = 1; 597 598 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 599 rmax = imax[row] = imax[row] + CHUNKSIZE; 600 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 601 a->maxnz += bs2*CHUNKSIZE; 602 a->reallocs++; 603 a->nz++; 604 } 605 N = nrow++ - 1; 606 /* shift up all the later entries in this row */ 607 for ( ii=N; ii>=i; ii-- ) { 608 rp[ii+1] = rp[ii]; 609 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 610 } 611 if (N >= i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 612 rp[i] = col; 613 bap = ap + bs2*i; 614 if (roworiented) { 615 for ( ii=0; ii<bs; ii++,value+=stepval) { 616 for (jj=ii; jj<bs2; jj+=bs ) { 617 bap[jj] = *value++; 618 } 619 } 620 } else { 621 for ( ii=0; ii<bs; ii++,value+=stepval ) { 622 for (jj=0; jj<bs; jj++ ) { 623 *bap++ = *value++; 624 } 625 } 626 } 627 noinsert2:; 628 low = i; 629 } 630 ailen[row] = nrow; 631 } 632 PetscFunctionReturn(0); 633 } 634 635 #undef __FUNC__ 636 #define __FUNC__ "MatGetSize_SeqBAIJ" 637 int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 638 { 639 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 640 641 PetscFunctionBegin; 642 if (m) *m = a->m; 643 if (n) *n = a->n; 644 PetscFunctionReturn(0); 645 } 646 647 #undef __FUNC__ 648 #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ" 649 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 650 { 651 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 652 653 PetscFunctionBegin; 654 *m = 0; *n = a->m; 655 PetscFunctionReturn(0); 656 } 657 658 #undef __FUNC__ 659 #define __FUNC__ "MatGetRow_SeqBAIJ" 660 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 661 { 662 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 663 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 664 Scalar *aa,*v_i,*aa_i; 665 666 PetscFunctionBegin; 667 bs = a->bs; 668 ai = a->i; 669 aj = a->j; 670 aa = a->a; 671 bs2 = a->bs2; 672 673 if (row < 0 || row >= a->m) SETERRQ(1,0,"Row out of range"); 674 675 bn = row/bs; /* Block number */ 676 bp = row % bs; /* Block Position */ 677 M = ai[bn+1] - ai[bn]; 678 *nz = bs*M; 679 680 if (v) { 681 *v = 0; 682 if (*nz) { 683 *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v); 684 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 685 v_i = *v + i*bs; 686 aa_i = aa + bs2*(ai[bn] + i); 687 for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];} 688 } 689 } 690 } 691 692 if (idx) { 693 *idx = 0; 694 if (*nz) { 695 *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx); 696 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 697 idx_i = *idx + i*bs; 698 itmp = bs*aj[ai[bn] + i]; 699 for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;} 700 } 701 } 702 } 703 PetscFunctionReturn(0); 704 } 705 706 #undef __FUNC__ 707 #define __FUNC__ "MatRestoreRow_SeqBAIJ" 708 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 709 { 710 PetscFunctionBegin; 711 if (idx) {if (*idx) PetscFree(*idx);} 712 if (v) {if (*v) PetscFree(*v);} 713 PetscFunctionReturn(0); 714 } 715 716 #undef __FUNC__ 717 #define __FUNC__ "MatTranspose_SeqBAIJ" 718 int MatTranspose_SeqBAIJ(Mat A,Mat *B) 719 { 720 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 721 Mat C; 722 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 723 int *rows,*cols,bs2=a->bs2; 724 Scalar *array=a->a; 725 726 PetscFunctionBegin; 727 if (B==PETSC_NULL && mbs!=nbs) SETERRQ(1,0,"Square matrix only for in-place"); 728 col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col); 729 PetscMemzero(col,(1+nbs)*sizeof(int)); 730 731 for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1; 732 ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr); 733 PetscFree(col); 734 rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows); 735 cols = rows + bs; 736 for ( i=0; i<mbs; i++ ) { 737 cols[0] = i*bs; 738 for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1; 739 len = ai[i+1] - ai[i]; 740 for ( j=0; j<len; j++ ) { 741 rows[0] = (*aj++)*bs; 742 for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1; 743 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr); 744 array += bs2; 745 } 746 } 747 PetscFree(rows); 748 749 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 750 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 751 752 if (B != PETSC_NULL) { 753 *B = C; 754 } else { 755 /* This isn't really an in-place transpose */ 756 PetscFree(a->a); 757 if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);} 758 if (a->diag) PetscFree(a->diag); 759 if (a->ilen) PetscFree(a->ilen); 760 if (a->imax) PetscFree(a->imax); 761 if (a->solve_work) PetscFree(a->solve_work); 762 PetscFree(a); 763 PetscMemcpy(A,C,sizeof(struct _p_Mat)); 764 PetscHeaderDestroy(C); 765 } 766 PetscFunctionReturn(0); 767 } 768 769 770 #undef __FUNC__ 771 #define __FUNC__ "MatAssemblyEnd_SeqBAIJ" 772 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 773 { 774 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 775 int fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax; 776 int m = a->m,*ip, N, *ailen = a->ilen; 777 int mbs = a->mbs, bs2 = a->bs2,rmax = 0; 778 Scalar *aa = a->a, *ap; 779 780 PetscFunctionBegin; 781 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 782 783 if (m) rmax = ailen[0]; 784 for ( i=1; i<mbs; i++ ) { 785 /* move each row back by the amount of empty slots (fshift) before it*/ 786 fshift += imax[i-1] - ailen[i-1]; 787 rmax = PetscMax(rmax,ailen[i]); 788 if (fshift) { 789 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 790 N = ailen[i]; 791 for ( j=0; j<N; j++ ) { 792 ip[j-fshift] = ip[j]; 793 PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar)); 794 } 795 } 796 ai[i] = ai[i-1] + ailen[i-1]; 797 } 798 if (mbs) { 799 fshift += imax[mbs-1] - ailen[mbs-1]; 800 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 801 } 802 /* reset ilen and imax for each row */ 803 for ( i=0; i<mbs; i++ ) { 804 ailen[i] = imax[i] = ai[i+1] - ai[i]; 805 } 806 a->nz = ai[mbs]; 807 808 /* diagonals may have moved, so kill the diagonal pointers */ 809 if (fshift && a->diag) { 810 PetscFree(a->diag); 811 PLogObjectMemory(A,-(m+1)*sizeof(int)); 812 a->diag = 0; 813 } 814 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 815 m,a->n,a->bs,fshift*bs2,a->nz*bs2); 816 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n", 817 a->reallocs); 818 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 819 a->reallocs = 0; 820 A->info.nz_unneeded = (double)fshift*bs2; 821 822 PetscFunctionReturn(0); 823 } 824 825 #undef __FUNC__ 826 #define __FUNC__ "MatZeroEntries_SeqBAIJ" 827 int MatZeroEntries_SeqBAIJ(Mat A) 828 { 829 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 830 831 PetscFunctionBegin; 832 PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar)); 833 PetscFunctionReturn(0); 834 } 835 836 #undef __FUNC__ 837 #define __FUNC__ "MatDestroy_SeqBAIJ" 838 int MatDestroy_SeqBAIJ(PetscObject obj) 839 { 840 Mat A = (Mat) obj; 841 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 842 843 #if defined(USE_PETSC_LOG) 844 PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 845 #endif 846 PetscFree(a->a); 847 if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);} 848 if (a->diag) PetscFree(a->diag); 849 if (a->ilen) PetscFree(a->ilen); 850 if (a->imax) PetscFree(a->imax); 851 if (a->solve_work) PetscFree(a->solve_work); 852 if (a->mult_work) PetscFree(a->mult_work); 853 PetscFree(a); 854 PLogObjectDestroy(A); 855 PetscHeaderDestroy(A); 856 PetscFunctionReturn(0); 857 } 858 859 #undef __FUNC__ 860 #define __FUNC__ "MatSetOption_SeqBAIJ" 861 int MatSetOption_SeqBAIJ(Mat A,MatOption op) 862 { 863 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 864 865 PetscFunctionBegin; 866 if (op == MAT_ROW_ORIENTED) a->roworiented = 1; 867 else if (op == MAT_COLUMN_ORIENTED) a->roworiented = 0; 868 else if (op == MAT_COLUMNS_SORTED) a->sorted = 1; 869 else if (op == MAT_COLUMNS_UNSORTED) a->sorted = 0; 870 else if (op == MAT_NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 871 else if (op == MAT_NEW_NONZERO_LOCATION_ERROR) a->nonew = -1; 872 else if (op == MAT_NEW_NONZERO_ALLOCATION_ERROR) a->nonew = -2; 873 else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 874 else if (op == MAT_ROWS_SORTED || 875 op == MAT_ROWS_UNSORTED || 876 op == MAT_SYMMETRIC || 877 op == MAT_STRUCTURALLY_SYMMETRIC || 878 op == MAT_YES_NEW_DIAGONALS || 879 op == MAT_IGNORE_OFF_PROC_ENTRIES) { 880 PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n"); 881 } else if (op == MAT_NO_NEW_DIAGONALS) { 882 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 883 } else { 884 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 885 } 886 PetscFunctionReturn(0); 887 } 888 889 890 /* -------------------------------------------------------*/ 891 /* Should check that shapes of vectors and matrices match */ 892 /* -------------------------------------------------------*/ 893 #include "pinclude/plapack.h" 894 895 #undef __FUNC__ 896 #define __FUNC__ "MatMult_SeqBAIJ_1" 897 static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz) 898 { 899 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 900 register Scalar *x,*z,*v,sum; 901 int mbs=a->mbs,i,*idx,*ii,n; 902 903 PetscFunctionBegin; 904 VecGetArray_Fast(xx,x); 905 VecGetArray_Fast(zz,z); 906 907 idx = a->j; 908 v = a->a; 909 ii = a->i; 910 911 for ( i=0; i<mbs; i++ ) { 912 n = ii[1] - ii[0]; ii++; 913 sum = 0.0; 914 while (n--) sum += *v++ * x[*idx++]; 915 z[i] = sum; 916 } 917 VecRestoreArray_Fast(xx,x); 918 VecRestoreArray_Fast(zz,z); 919 PLogFlops(2*a->nz - a->m); 920 PetscFunctionReturn(0); 921 } 922 923 #undef __FUNC__ 924 #define __FUNC__ "MatMult_SeqBAIJ_2" 925 static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz) 926 { 927 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 928 register Scalar *x,*z,*v,*xb,sum1,sum2; 929 register Scalar x1,x2; 930 int mbs=a->mbs,i,*idx,*ii,j,n; 931 932 PetscFunctionBegin; 933 VecGetArray_Fast(xx,x); 934 VecGetArray_Fast(zz,z); 935 936 idx = a->j; 937 v = a->a; 938 ii = a->i; 939 940 for ( i=0; i<mbs; i++ ) { 941 n = ii[1] - ii[0]; ii++; 942 sum1 = 0.0; sum2 = 0.0; 943 for ( j=0; j<n; j++ ) { 944 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 945 sum1 += v[0]*x1 + v[2]*x2; 946 sum2 += v[1]*x1 + v[3]*x2; 947 v += 4; 948 } 949 z[0] = sum1; z[1] = sum2; 950 z += 2; 951 } 952 VecRestoreArray_Fast(xx,x); 953 VecRestoreArray_Fast(zz,z); 954 PLogFlops(4*a->nz - a->m); 955 PetscFunctionReturn(0); 956 } 957 958 #undef __FUNC__ 959 #define __FUNC__ "MatMult_SeqBAIJ_3" 960 static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz) 961 { 962 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 963 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 964 int mbs=a->mbs,i,*idx,*ii,j,n; 965 966 PetscFunctionBegin; 967 VecGetArray_Fast(xx,x); 968 VecGetArray_Fast(zz,z); 969 970 idx = a->j; 971 v = a->a; 972 ii = a->i; 973 974 for ( i=0; i<mbs; i++ ) { 975 n = ii[1] - ii[0]; ii++; 976 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 977 for ( j=0; j<n; j++ ) { 978 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 979 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 980 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 981 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 982 v += 9; 983 } 984 z[0] = sum1; z[1] = sum2; z[2] = sum3; 985 z += 3; 986 } 987 VecRestoreArray_Fast(xx,x); 988 VecRestoreArray_Fast(zz,z); 989 PLogFlops(18*a->nz - a->m); 990 PetscFunctionReturn(0); 991 } 992 993 #undef __FUNC__ 994 #define __FUNC__ "MatMult_SeqBAIJ_4" 995 static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz) 996 { 997 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 998 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,x1,x2,x3,x4; 999 int mbs=a->mbs,i,*idx,*ii,j,n; 1000 1001 PetscFunctionBegin; 1002 VecGetArray_Fast(xx,x); 1003 VecGetArray_Fast(zz,z); 1004 1005 idx = a->j; 1006 v = a->a; 1007 ii = a->i; 1008 1009 for ( i=0; i<mbs; i++ ) { 1010 n = ii[1] - ii[0]; ii++; 1011 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 1012 for ( j=0; j<n; j++ ) { 1013 xb = x + 4*(*idx++); 1014 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1015 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 1016 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 1017 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 1018 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 1019 v += 16; 1020 } 1021 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 1022 z += 4; 1023 } 1024 VecRestoreArray_Fast(xx,x); 1025 VecRestoreArray_Fast(zz,z); 1026 PLogFlops(32*a->nz - a->m); 1027 PetscFunctionReturn(0); 1028 } 1029 1030 #undef __FUNC__ 1031 #define __FUNC__ "MatMult_SeqBAIJ_5" 1032 static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz) 1033 { 1034 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1035 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5; 1036 int mbs=a->mbs,i,*idx,*ii,j,n; 1037 1038 VecGetArray_Fast(xx,x); 1039 VecGetArray_Fast(zz,z); 1040 1041 idx = a->j; 1042 v = a->a; 1043 ii = a->i; 1044 1045 for ( i=0; i<mbs; i++ ) { 1046 n = ii[1] - ii[0]; ii++; 1047 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 1048 for ( j=0; j<n; j++ ) { 1049 xb = x + 5*(*idx++); 1050 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 1051 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 1052 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 1053 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 1054 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 1055 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 1056 v += 25; 1057 } 1058 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 1059 z += 5; 1060 } 1061 VecRestoreArray_Fast(xx,x); 1062 VecRestoreArray_Fast(zz,z); 1063 PLogFlops(50*a->nz - a->m); 1064 PetscFunctionReturn(0); 1065 } 1066 1067 #undef __FUNC__ 1068 #define __FUNC__ "MatMult_SeqBAIJ_7" 1069 static int MatMult_SeqBAIJ_7(Mat A,Vec xx,Vec zz) 1070 { 1071 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1072 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7; 1073 register Scalar x1,x2,x3,x4,x5,x6,x7; 1074 int mbs=a->mbs,i,*idx,*ii,j,n; 1075 1076 VecGetArray_Fast(xx,x); 1077 VecGetArray_Fast(zz,z); 1078 1079 idx = a->j; 1080 v = a->a; 1081 ii = a->i; 1082 1083 for ( i=0; i<mbs; i++ ) { 1084 n = ii[1] - ii[0]; ii++; 1085 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0; 1086 for ( j=0; j<n; j++ ) { 1087 xb = x + 7*(*idx++); 1088 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6]; 1089 sum1 += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 1090 sum2 += v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 1091 sum3 += v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 1092 sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 1093 sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 1094 sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 1095 sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 1096 v += 49; 1097 } 1098 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7; 1099 z += 7; 1100 } 1101 1102 VecRestoreArray_Fast(xx,x); 1103 VecRestoreArray_Fast(zz,z); 1104 PLogFlops(98*a->nz - a->m); 1105 PetscFunctionReturn(0); 1106 } 1107 1108 #undef __FUNC__ 1109 #define __FUNC__ "MatMult_SeqBAIJ_N" 1110 static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz) 1111 { 1112 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1113 register Scalar *x,*z,*v,*xb; 1114 Scalar _DOne = 1.0,*work,*workt,_DZero = 0.0; 1115 int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2; 1116 int _One = 1,ncols,k; 1117 1118 PetscFunctionBegin; 1119 VecGetArray_Fast(xx,x); 1120 VecGetArray_Fast(zz,z); 1121 1122 idx = a->j; 1123 v = a->a; 1124 ii = a->i; 1125 1126 1127 if (!a->mult_work) { 1128 k = PetscMax(a->m,a->n); 1129 a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work); 1130 } 1131 work = a->mult_work; 1132 for ( i=0; i<mbs; i++ ) { 1133 n = ii[1] - ii[0]; ii++; 1134 ncols = n*bs; 1135 workt = work; 1136 for ( j=0; j<n; j++ ) { 1137 xb = x + bs*(*idx++); 1138 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 1139 workt += bs; 1140 } 1141 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One); 1142 v += n*bs2; 1143 z += bs; 1144 } 1145 VecRestoreArray_Fast(xx,x); 1146 VecRestoreArray_Fast(zz,z); 1147 PLogFlops(2*a->nz*bs2 - a->m); 1148 PetscFunctionReturn(0); 1149 } 1150 1151 #undef __FUNC__ 1152 #define __FUNC__ "MatMultAdd_SeqBAIJ_1" 1153 static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 1154 { 1155 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1156 register Scalar *x,*y,*z,*v,sum; 1157 int mbs=a->mbs,i,*idx,*ii,n; 1158 1159 PetscFunctionBegin; 1160 VecGetArray_Fast(xx,x); 1161 VecGetArray_Fast(yy,y); 1162 VecGetArray_Fast(zz,z); 1163 1164 idx = a->j; 1165 v = a->a; 1166 ii = a->i; 1167 1168 for ( i=0; i<mbs; i++ ) { 1169 n = ii[1] - ii[0]; ii++; 1170 sum = y[i]; 1171 while (n--) sum += *v++ * x[*idx++]; 1172 z[i] = sum; 1173 } 1174 VecRestoreArray_Fast(xx,x); 1175 VecRestoreArray_Fast(yy,y); 1176 VecRestoreArray_Fast(zz,z); 1177 PLogFlops(2*a->nz); 1178 PetscFunctionReturn(0); 1179 } 1180 1181 #undef __FUNC__ 1182 #define __FUNC__ "MatMultAdd_SeqBAIJ_2" 1183 static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 1184 { 1185 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1186 register Scalar *x,*y,*z,*v,*xb,sum1,sum2; 1187 register Scalar x1,x2; 1188 int mbs=a->mbs,i,*idx,*ii,j,n; 1189 1190 PetscFunctionBegin; 1191 VecGetArray_Fast(xx,x); 1192 VecGetArray_Fast(yy,y); 1193 VecGetArray_Fast(zz,z); 1194 1195 idx = a->j; 1196 v = a->a; 1197 ii = a->i; 1198 1199 for ( i=0; i<mbs; i++ ) { 1200 n = ii[1] - ii[0]; ii++; 1201 sum1 = y[0]; sum2 = y[1]; 1202 for ( j=0; j<n; j++ ) { 1203 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 1204 sum1 += v[0]*x1 + v[2]*x2; 1205 sum2 += v[1]*x1 + v[3]*x2; 1206 v += 4; 1207 } 1208 z[0] = sum1; z[1] = sum2; 1209 z += 2; y += 2; 1210 } 1211 VecRestoreArray_Fast(xx,x); 1212 VecRestoreArray_Fast(yy,y); 1213 VecRestoreArray_Fast(zz,z); 1214 PLogFlops(4*a->nz); 1215 PetscFunctionReturn(0); 1216 } 1217 1218 #undef __FUNC__ 1219 #define __FUNC__ "MatMultAdd_SeqBAIJ_3" 1220 static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 1221 { 1222 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1223 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 1224 int mbs=a->mbs,i,*idx,*ii,j,n; 1225 1226 PetscFunctionBegin; 1227 VecGetArray_Fast(xx,x); 1228 VecGetArray_Fast(yy,y); 1229 VecGetArray_Fast(zz,z); 1230 1231 idx = a->j; 1232 v = a->a; 1233 ii = a->i; 1234 1235 for ( i=0; i<mbs; i++ ) { 1236 n = ii[1] - ii[0]; ii++; 1237 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; 1238 for ( j=0; j<n; j++ ) { 1239 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1240 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 1241 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 1242 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 1243 v += 9; 1244 } 1245 z[0] = sum1; z[1] = sum2; z[2] = sum3; 1246 z += 3; y += 3; 1247 } 1248 VecRestoreArray_Fast(xx,x); 1249 VecRestoreArray_Fast(yy,y); 1250 VecRestoreArray_Fast(zz,z); 1251 PLogFlops(18*a->nz); 1252 PetscFunctionReturn(0); 1253 } 1254 1255 #undef __FUNC__ 1256 #define __FUNC__ "MatMultAdd_SeqBAIJ_4" 1257 static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 1258 { 1259 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1260 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,x1,x2,x3,x4; 1261 int mbs=a->mbs,i,*idx,*ii; 1262 int j,n; 1263 1264 PetscFunctionBegin; 1265 VecGetArray_Fast(xx,x); 1266 VecGetArray_Fast(yy,y); 1267 VecGetArray_Fast(zz,z); 1268 1269 idx = a->j; 1270 v = a->a; 1271 ii = a->i; 1272 1273 for ( i=0; i<mbs; i++ ) { 1274 n = ii[1] - ii[0]; ii++; 1275 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; 1276 for ( j=0; j<n; j++ ) { 1277 xb = x + 4*(*idx++); 1278 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1279 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 1280 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 1281 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 1282 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 1283 v += 16; 1284 } 1285 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 1286 z += 4; y += 4; 1287 } 1288 VecRestoreArray_Fast(xx,x); 1289 VecRestoreArray_Fast(yy,y); 1290 VecRestoreArray_Fast(zz,z); 1291 PLogFlops(32*a->nz); 1292 PetscFunctionReturn(0); 1293 } 1294 1295 #undef __FUNC__ 1296 #define __FUNC__ "MatMultAdd_SeqBAIJ_5" 1297 static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 1298 { 1299 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1300 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5; 1301 int mbs=a->mbs,i,*idx,*ii,j,n; 1302 1303 PetscFunctionBegin; 1304 VecGetArray_Fast(xx,x); 1305 VecGetArray_Fast(yy,y); 1306 VecGetArray_Fast(zz,z); 1307 1308 idx = a->j; 1309 v = a->a; 1310 ii = a->i; 1311 1312 for ( i=0; i<mbs; i++ ) { 1313 n = ii[1] - ii[0]; ii++; 1314 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; 1315 for ( j=0; j<n; j++ ) { 1316 xb = x + 5*(*idx++); 1317 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 1318 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 1319 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 1320 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 1321 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 1322 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 1323 v += 25; 1324 } 1325 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 1326 z += 5; y += 5; 1327 } 1328 VecRestoreArray_Fast(xx,x); 1329 VecRestoreArray_Fast(yy,y); 1330 VecRestoreArray_Fast(zz,z); 1331 PLogFlops(50*a->nz); 1332 PetscFunctionReturn(0); 1333 } 1334 1335 #undef __FUNC__ 1336 #define __FUNC__ "MatMultAdd_SeqBAIJ_7" 1337 static int MatMultAdd_SeqBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1338 { 1339 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1340 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7; 1341 register Scalar x1,x2,x3,x4,x5,x6,x7; 1342 int mbs=a->mbs,i,*idx,*ii,j,n; 1343 1344 PetscFunctionBegin; 1345 VecGetArray_Fast(xx,x); 1346 VecGetArray_Fast(yy,y); 1347 VecGetArray_Fast(zz,z); 1348 1349 idx = a->j; 1350 v = a->a; 1351 ii = a->i; 1352 1353 for ( i=0; i<mbs; i++ ) { 1354 n = ii[1] - ii[0]; ii++; 1355 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; sum7 = y[6]; 1356 for ( j=0; j<n; j++ ) { 1357 xb = x + 7*(*idx++); 1358 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6]; 1359 sum1 += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 1360 sum2 += v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 1361 sum3 += v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 1362 sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 1363 sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 1364 sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 1365 sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 1366 v += 49; 1367 } 1368 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7; 1369 z += 7; y += 7; 1370 } 1371 VecRestoreArray_Fast(xx,x); 1372 VecRestoreArray_Fast(yy,y); 1373 VecRestoreArray_Fast(zz,z); 1374 PLogFlops(98*a->nz); 1375 PetscFunctionReturn(0); 1376 } 1377 1378 1379 #undef __FUNC__ 1380 #define __FUNC__ "MatMultAdd_SeqBAIJ_N" 1381 static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 1382 { 1383 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1384 register Scalar *x,*z,*v,*xb; 1385 int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr; 1386 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1387 1388 PetscFunctionBegin; 1389 if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 1390 1391 VecGetArray_Fast(xx,x); 1392 VecGetArray_Fast(zz,z); 1393 1394 idx = a->j; 1395 v = a->a; 1396 ii = a->i; 1397 1398 1399 if (!a->mult_work) { 1400 k = PetscMax(a->m,a->n); 1401 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work); 1402 } 1403 work = a->mult_work; 1404 for ( i=0; i<mbs; i++ ) { 1405 n = ii[1] - ii[0]; ii++; 1406 ncols = n*bs; 1407 workt = work; 1408 for ( j=0; j<n; j++ ) { 1409 xb = x + bs*(*idx++); 1410 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 1411 workt += bs; 1412 } 1413 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); 1414 v += n*bs2; 1415 z += bs; 1416 } 1417 VecRestoreArray_Fast(xx,x); 1418 VecRestoreArray_Fast(zz,z); 1419 PLogFlops(2*a->nz*bs2 ); 1420 PetscFunctionReturn(0); 1421 } 1422 1423 #undef __FUNC__ 1424 #define __FUNC__ "MatMultTrans_SeqBAIJ" 1425 int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz) 1426 { 1427 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1428 Scalar *xg,*zg,*zb; 1429 register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 1430 int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 1431 int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1432 1433 1434 PetscFunctionBegin; 1435 VecGetArray_Fast(xx,xg); x = xg; 1436 VecGetArray_Fast(zz,zg); z = zg; 1437 PetscMemzero(z,N*sizeof(Scalar)); 1438 1439 idx = a->j; 1440 v = a->a; 1441 ii = a->i; 1442 1443 switch (bs) { 1444 case 1: 1445 for ( i=0; i<mbs; i++ ) { 1446 n = ii[1] - ii[0]; ii++; 1447 xb = x + i; x1 = xb[0]; 1448 ib = idx + ai[i]; 1449 for ( j=0; j<n; j++ ) { 1450 rval = ib[j]; 1451 z[rval] += *v++ * x1; 1452 } 1453 } 1454 break; 1455 case 2: 1456 for ( i=0; i<mbs; i++ ) { 1457 n = ii[1] - ii[0]; ii++; 1458 xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1459 ib = idx + ai[i]; 1460 for ( j=0; j<n; j++ ) { 1461 rval = ib[j]*2; 1462 z[rval++] += v[0]*x1 + v[1]*x2; 1463 z[rval++] += v[2]*x1 + v[3]*x2; 1464 v += 4; 1465 } 1466 } 1467 break; 1468 case 3: 1469 for ( i=0; i<mbs; i++ ) { 1470 n = ii[1] - ii[0]; ii++; 1471 xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1472 ib = idx + ai[i]; 1473 for ( j=0; j<n; j++ ) { 1474 rval = ib[j]*3; 1475 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1476 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1477 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1478 v += 9; 1479 } 1480 } 1481 break; 1482 case 4: 1483 for ( i=0; i<mbs; i++ ) { 1484 n = ii[1] - ii[0]; ii++; 1485 xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1486 ib = idx + ai[i]; 1487 for ( j=0; j<n; j++ ) { 1488 rval = ib[j]*4; 1489 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1490 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1491 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1492 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1493 v += 16; 1494 } 1495 } 1496 break; 1497 case 5: 1498 for ( i=0; i<mbs; i++ ) { 1499 n = ii[1] - ii[0]; ii++; 1500 xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1501 x4 = xb[3]; x5 = xb[4]; 1502 ib = idx + ai[i]; 1503 for ( j=0; j<n; j++ ) { 1504 rval = ib[j]*5; 1505 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1506 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1507 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1508 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1509 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1510 v += 25; 1511 } 1512 } 1513 break; 1514 /* block sizes larger then 5 by 5 are handled by BLAS */ 1515 default: { 1516 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1517 if (!a->mult_work) { 1518 k = PetscMax(a->m,a->n); 1519 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1520 CHKPTRQ(a->mult_work); 1521 } 1522 work = a->mult_work; 1523 for ( i=0; i<mbs; i++ ) { 1524 n = ii[1] - ii[0]; ii++; 1525 ncols = n*bs; 1526 PetscMemzero(work,ncols*sizeof(Scalar)); 1527 LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1528 v += n*bs2; 1529 x += bs; 1530 workt = work; 1531 for ( j=0; j<n; j++ ) { 1532 zb = z + bs*(*idx++); 1533 for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1534 workt += bs; 1535 } 1536 } 1537 } 1538 } 1539 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 1540 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1541 PLogFlops(2*a->nz*a->bs2 - a->n); 1542 PetscFunctionReturn(0); 1543 } 1544 1545 #undef __FUNC__ 1546 #define __FUNC__ "MatMultTransAdd_SeqBAIJ" 1547 int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1548 { 1549 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1550 Scalar *xg,*zg,*zb; 1551 register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 1552 int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 1553 int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1554 1555 PetscFunctionBegin; 1556 VecGetArray_Fast(xx,xg); x = xg; 1557 VecGetArray_Fast(zz,zg); z = zg; 1558 1559 if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 1560 else PetscMemzero(z,N*sizeof(Scalar)); 1561 1562 idx = a->j; 1563 v = a->a; 1564 ii = a->i; 1565 1566 switch (bs) { 1567 case 1: 1568 for ( i=0; i<mbs; i++ ) { 1569 n = ii[1] - ii[0]; ii++; 1570 xb = x + i; x1 = xb[0]; 1571 ib = idx + ai[i]; 1572 for ( j=0; j<n; j++ ) { 1573 rval = ib[j]; 1574 z[rval] += *v++ * x1; 1575 } 1576 } 1577 break; 1578 case 2: 1579 for ( i=0; i<mbs; i++ ) { 1580 n = ii[1] - ii[0]; ii++; 1581 xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1582 ib = idx + ai[i]; 1583 for ( j=0; j<n; j++ ) { 1584 rval = ib[j]*2; 1585 z[rval++] += v[0]*x1 + v[1]*x2; 1586 z[rval++] += v[2]*x1 + v[3]*x2; 1587 v += 4; 1588 } 1589 } 1590 break; 1591 case 3: 1592 for ( i=0; i<mbs; i++ ) { 1593 n = ii[1] - ii[0]; ii++; 1594 xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1595 ib = idx + ai[i]; 1596 for ( j=0; j<n; j++ ) { 1597 rval = ib[j]*3; 1598 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1599 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1600 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1601 v += 9; 1602 } 1603 } 1604 break; 1605 case 4: 1606 for ( i=0; i<mbs; i++ ) { 1607 n = ii[1] - ii[0]; ii++; 1608 xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1609 ib = idx + ai[i]; 1610 for ( j=0; j<n; j++ ) { 1611 rval = ib[j]*4; 1612 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1613 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1614 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1615 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1616 v += 16; 1617 } 1618 } 1619 break; 1620 case 5: 1621 for ( i=0; i<mbs; i++ ) { 1622 n = ii[1] - ii[0]; ii++; 1623 xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1624 x4 = xb[3]; x5 = xb[4]; 1625 ib = idx + ai[i]; 1626 for ( j=0; j<n; j++ ) { 1627 rval = ib[j]*5; 1628 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1629 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1630 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1631 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1632 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1633 v += 25; 1634 } 1635 } 1636 break; 1637 /* block sizes larger then 5 by 5 are handled by BLAS */ 1638 default: { 1639 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1640 if (!a->mult_work) { 1641 k = PetscMax(a->m,a->n); 1642 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1643 CHKPTRQ(a->mult_work); 1644 } 1645 work = a->mult_work; 1646 for ( i=0; i<mbs; i++ ) { 1647 n = ii[1] - ii[0]; ii++; 1648 ncols = n*bs; 1649 PetscMemzero(work,ncols*sizeof(Scalar)); 1650 LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1651 v += n*bs2; 1652 x += bs; 1653 workt = work; 1654 for ( j=0; j<n; j++ ) { 1655 zb = z + bs*(*idx++); 1656 for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1657 workt += bs; 1658 } 1659 } 1660 } 1661 } 1662 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 1663 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1664 PLogFlops(2*a->nz*a->bs2); 1665 PetscFunctionReturn(0); 1666 } 1667 1668 #undef __FUNC__ 1669 #define __FUNC__ "MatGetInfo_SeqBAIJ" 1670 int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info) 1671 { 1672 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1673 1674 PetscFunctionBegin; 1675 info->rows_global = (double)a->m; 1676 info->columns_global = (double)a->n; 1677 info->rows_local = (double)a->m; 1678 info->columns_local = (double)a->n; 1679 info->block_size = a->bs2; 1680 info->nz_allocated = a->maxnz; 1681 info->nz_used = a->bs2*a->nz; 1682 info->nz_unneeded = (double)(info->nz_allocated - info->nz_used); 1683 /* if (info->nz_unneeded != A->info.nz_unneeded) 1684 printf("space descrepancy: maxnz-nz = %d, nz_unneeded = %d\n",(int)info->nz_unneeded,(int)A->info.nz_unneeded); */ 1685 info->assemblies = A->num_ass; 1686 info->mallocs = a->reallocs; 1687 info->memory = A->mem; 1688 if (A->factor) { 1689 info->fill_ratio_given = A->info.fill_ratio_given; 1690 info->fill_ratio_needed = A->info.fill_ratio_needed; 1691 info->factor_mallocs = A->info.factor_mallocs; 1692 } else { 1693 info->fill_ratio_given = 0; 1694 info->fill_ratio_needed = 0; 1695 info->factor_mallocs = 0; 1696 } 1697 PetscFunctionReturn(0); 1698 } 1699 1700 #undef __FUNC__ 1701 #define __FUNC__ "MatEqual_SeqBAIJ" 1702 int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg) 1703 { 1704 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 1705 1706 PetscFunctionBegin; 1707 if (B->type !=MATSEQBAIJ)SETERRQ(1,0,"Matrices must be same type"); 1708 1709 /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 1710 if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)|| (a->nz != b->nz)) { 1711 *flg = PETSC_FALSE; PetscFunctionReturn(0); 1712 } 1713 1714 /* if the a->i are the same */ 1715 if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) { 1716 *flg = PETSC_FALSE; PetscFunctionReturn(0); 1717 } 1718 1719 /* if a->j are the same */ 1720 if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) { 1721 *flg = PETSC_FALSE; PetscFunctionReturn(0); 1722 } 1723 1724 /* if a->a are the same */ 1725 if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) { 1726 *flg = PETSC_FALSE; PetscFunctionReturn(0); 1727 } 1728 *flg = PETSC_TRUE; 1729 PetscFunctionReturn(0); 1730 1731 } 1732 1733 #undef __FUNC__ 1734 #define __FUNC__ "MatGetDiagonal_SeqBAIJ" 1735 int MatGetDiagonal_SeqBAIJ(Mat A,Vec v) 1736 { 1737 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1738 int i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 1739 Scalar *x, zero = 0.0,*aa,*aa_j; 1740 1741 PetscFunctionBegin; 1742 if (A->factor) SETERRQ(1,0,"Not for factored matrix"); 1743 bs = a->bs; 1744 aa = a->a; 1745 ai = a->i; 1746 aj = a->j; 1747 ambs = a->mbs; 1748 bs2 = a->bs2; 1749 1750 VecSet(&zero,v); 1751 VecGetArray_Fast(v,x); VecGetLocalSize_Fast(v,n); 1752 if (n != a->m) SETERRQ(1,0,"Nonconforming matrix and vector"); 1753 for ( i=0; i<ambs; i++ ) { 1754 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1755 if (aj[j] == i) { 1756 row = i*bs; 1757 aa_j = aa+j*bs2; 1758 for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 1759 break; 1760 } 1761 } 1762 } 1763 PetscFunctionReturn(0); 1764 } 1765 1766 #undef __FUNC__ 1767 #define __FUNC__ "MatDiagonalScale_SeqBAIJ" 1768 int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr) 1769 { 1770 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1771 Scalar *l,*r,x,*v,*aa,*li,*ri; 1772 int i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2; 1773 1774 PetscFunctionBegin; 1775 ai = a->i; 1776 aj = a->j; 1777 aa = a->a; 1778 m = a->m; 1779 n = a->n; 1780 bs = a->bs; 1781 mbs = a->mbs; 1782 bs2 = a->bs2; 1783 if (ll) { 1784 VecGetArray_Fast(ll,l); VecGetLocalSize_Fast(ll,lm); 1785 if (lm != m) SETERRQ(1,0,"Left scaling vector wrong length"); 1786 for ( i=0; i<mbs; i++ ) { /* for each block row */ 1787 M = ai[i+1] - ai[i]; 1788 li = l + i*bs; 1789 v = aa + bs2*ai[i]; 1790 for ( j=0; j<M; j++ ) { /* for each block */ 1791 for ( k=0; k<bs2; k++ ) { 1792 (*v++) *= li[k%bs]; 1793 } 1794 } 1795 } 1796 } 1797 1798 if (rr) { 1799 VecGetArray_Fast(rr,r); VecGetLocalSize_Fast(rr,rn); 1800 if (rn != n) SETERRQ(1,0,"Right scaling vector wrong length"); 1801 for ( i=0; i<mbs; i++ ) { /* for each block row */ 1802 M = ai[i+1] - ai[i]; 1803 v = aa + bs2*ai[i]; 1804 for ( j=0; j<M; j++ ) { /* for each block */ 1805 ri = r + bs*aj[ai[i]+j]; 1806 for ( k=0; k<bs; k++ ) { 1807 x = ri[k]; 1808 for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x; 1809 } 1810 } 1811 } 1812 } 1813 PetscFunctionReturn(0); 1814 } 1815 1816 1817 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 1818 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 1819 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int); 1820 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*); 1821 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**); 1822 1823 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec); 1824 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec); 1825 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec); 1826 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec); 1827 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec); 1828 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec); 1829 extern int MatSolve_SeqBAIJ_7(Mat,Vec,Vec); 1830 1831 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 1832 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 1833 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 1834 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 1835 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 1836 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 1837 1838 #undef __FUNC__ 1839 #define __FUNC__ "MatNorm_SeqBAIJ" 1840 int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm) 1841 { 1842 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1843 Scalar *v = a->a; 1844 double sum = 0.0; 1845 int i,nz=a->nz,bs2=a->bs2; 1846 1847 PetscFunctionBegin; 1848 if (type == NORM_FROBENIUS) { 1849 for (i=0; i< bs2*nz; i++ ) { 1850 #if defined(USE_PETSC_COMPLEX) 1851 sum += real(conj(*v)*(*v)); v++; 1852 #else 1853 sum += (*v)*(*v); v++; 1854 #endif 1855 } 1856 *norm = sqrt(sum); 1857 } 1858 else { 1859 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 1860 } 1861 PetscFunctionReturn(0); 1862 } 1863 1864 extern int MatSolve_SeqBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 1865 /* 1866 note: This can only work for identity for row and col. It would 1867 be good to check this and otherwise generate an error. 1868 */ 1869 #undef __FUNC__ 1870 #define __FUNC__ "MatILUFactor_SeqBAIJ" 1871 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill) 1872 { 1873 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1874 Mat outA; 1875 int ierr; 1876 1877 PetscFunctionBegin; 1878 if (fill != 0) SETERRQ(1,0,"Only fill=0 supported"); 1879 1880 outA = inA; 1881 inA->factor = FACTOR_LU; 1882 a->row = row; 1883 a->col = col; 1884 1885 if (!a->solve_work) { 1886 a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 1887 PLogObjectMemory(inA,(a->m+a->bs)*sizeof(Scalar)); 1888 } 1889 1890 if (!a->diag) { 1891 ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr); 1892 } 1893 ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr); 1894 1895 /* 1896 Blocksize 4 has a special faster solver for ILU(0) factorization 1897 with natural ordering 1898 */ 1899 if (a->bs == 4) { 1900 inA->ops.solve = MatSolve_SeqBAIJ_4_NaturalOrdering; 1901 } 1902 1903 PetscFunctionReturn(0); 1904 } 1905 1906 #undef __FUNC__ 1907 #define __FUNC__ "MatScale_SeqBAIJ" 1908 int MatScale_SeqBAIJ(Scalar *alpha,Mat inA) 1909 { 1910 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1911 int one = 1, totalnz = a->bs2*a->nz; 1912 1913 PetscFunctionBegin; 1914 BLscal_( &totalnz, alpha, a->a, &one ); 1915 PLogFlops(totalnz); 1916 PetscFunctionReturn(0); 1917 } 1918 1919 #undef __FUNC__ 1920 #define __FUNC__ "MatGetValues_SeqBAIJ" 1921 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 1922 { 1923 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1924 int *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 1925 int *ai = a->i, *ailen = a->ilen; 1926 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 1927 Scalar *ap, *aa = a->a, zero = 0.0; 1928 1929 PetscFunctionBegin; 1930 for ( k=0; k<m; k++ ) { /* loop over rows */ 1931 row = im[k]; brow = row/bs; 1932 if (row < 0) SETERRQ(1,0,"Negative row"); 1933 if (row >= a->m) SETERRQ(1,0,"Row too large"); 1934 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1935 nrow = ailen[brow]; 1936 for ( l=0; l<n; l++ ) { /* loop over columns */ 1937 if (in[l] < 0) SETERRQ(1,0,"Negative column"); 1938 if (in[l] >= a->n) SETERRQ(1,0,"Column too large"); 1939 col = in[l] ; 1940 bcol = col/bs; 1941 cidx = col%bs; 1942 ridx = row%bs; 1943 high = nrow; 1944 low = 0; /* assume unsorted */ 1945 while (high-low > 5) { 1946 t = (low+high)/2; 1947 if (rp[t] > bcol) high = t; 1948 else low = t; 1949 } 1950 for ( i=low; i<high; i++ ) { 1951 if (rp[i] > bcol) break; 1952 if (rp[i] == bcol) { 1953 *v++ = ap[bs2*i+bs*cidx+ridx]; 1954 goto finished; 1955 } 1956 } 1957 *v++ = zero; 1958 finished:; 1959 } 1960 } 1961 PetscFunctionReturn(0); 1962 } 1963 1964 #undef __FUNC__ 1965 #define __FUNC__ "MatGetBlockSize_SeqBAIJ" 1966 int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs) 1967 { 1968 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data; 1969 1970 PetscFunctionBegin; 1971 *bs = baij->bs; 1972 PetscFunctionReturn(0); 1973 } 1974 1975 /* idx should be of length atlease bs */ 1976 #undef __FUNC__ 1977 #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Block" 1978 static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg) 1979 { 1980 int i,row; 1981 1982 PetscFunctionBegin; 1983 row = idx[0]; 1984 if (row%bs!=0) { *flg = PETSC_FALSE; PetscFunctionReturn(0); } 1985 1986 for ( i=1; i<bs; i++ ) { 1987 if (row+i != idx[i]) { *flg = PETSC_FALSE; PetscFunctionReturn(0); } 1988 } 1989 *flg = PETSC_TRUE; 1990 PetscFunctionReturn(0); 1991 } 1992 1993 #undef __FUNC__ 1994 #define __FUNC__ "MatZeroRows_SeqBAIJ" 1995 int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag) 1996 { 1997 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1998 IS is_local; 1999 int ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2; 2000 PetscTruth flg; 2001 Scalar zero = 0.0,*aa; 2002 2003 PetscFunctionBegin; 2004 /* Make a copy of the IS and sort it */ 2005 ierr = ISGetSize(is,&is_n);CHKERRQ(ierr); 2006 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 2007 ierr = ISCreateGeneral(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr); 2008 ierr = ISSort(is_local); CHKERRQ(ierr); 2009 ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr); 2010 2011 i = 0; 2012 while (i < is_n) { 2013 if (rows[i]<0 || rows[i]>m) SETERRQ(1,0,"row out of range"); 2014 flg = PETSC_FALSE; 2015 if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); } 2016 if (flg) { /* There exists a block of rows to be Zerowed */ 2017 baij->ilen[rows[i]/bs] = 0; 2018 i += bs; 2019 } else { /* Zero out only the requested row */ 2020 count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs; 2021 aa = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs); 2022 for ( j=0; j<count; j++ ) { 2023 aa[0] = zero; 2024 aa+=bs; 2025 } 2026 i++; 2027 } 2028 } 2029 if (diag) { 2030 for ( j=0; j<is_n; j++ ) { 2031 ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 2032 } 2033 } 2034 ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr); 2035 ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr); 2036 ierr = ISDestroy(is_local); CHKERRQ(ierr); 2037 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2038 2039 PetscFunctionReturn(0); 2040 } 2041 2042 #undef __FUNC__ 2043 #define __FUNC__ "MatPrintHelp_SeqBAIJ" 2044 int MatPrintHelp_SeqBAIJ(Mat A) 2045 { 2046 static int called = 0; 2047 MPI_Comm comm = A->comm; 2048 2049 PetscFunctionBegin; 2050 if (called) {PetscFunctionReturn(0);} else called = 1; 2051 PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n"); 2052 PetscPrintf(comm," -mat_block_size <block_size>\n"); 2053 PetscFunctionReturn(0); 2054 } 2055 2056 /* -------------------------------------------------------------------*/ 2057 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ, 2058 MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ, 2059 MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N, 2060 MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ, 2061 MatSolve_SeqBAIJ_N,0, 2062 0,0, 2063 MatLUFactor_SeqBAIJ,0, 2064 0, 2065 MatTranspose_SeqBAIJ, 2066 MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ, 2067 MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ, 2068 0,MatAssemblyEnd_SeqBAIJ, 2069 0, 2070 MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ, 2071 MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0, 2072 MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ, 2073 MatILUFactorSymbolic_SeqBAIJ,0, 2074 0,0, 2075 MatConvertSameType_SeqBAIJ,0,0, 2076 MatILUFactor_SeqBAIJ,0,0, 2077 MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ, 2078 MatGetValues_SeqBAIJ,0, 2079 MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ, 2080 0,0,0,MatGetBlockSize_SeqBAIJ, 2081 MatGetRowIJ_SeqBAIJ, 2082 MatRestoreRowIJ_SeqBAIJ, 2083 0,0,0,0,0,0, 2084 MatSetValuesBlocked_SeqBAIJ}; 2085 2086 #undef __FUNC__ 2087 #define __FUNC__ "MatCreateSeqBAIJ" 2088 /*@C 2089 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 2090 compressed row) format. For good matrix assembly performance the 2091 user should preallocate the matrix storage by setting the parameter nz 2092 (or the array nzz). By setting these parameters accurately, performance 2093 during matrix assembly can be increased by more than a factor of 50. 2094 2095 Input Parameters: 2096 . comm - MPI communicator, set to PETSC_COMM_SELF 2097 . bs - size of block 2098 . m - number of rows 2099 . n - number of columns 2100 . nz - number of block nonzeros per block row (same for all rows) 2101 . nzz - array containing the number of block nonzeros in the various block rows 2102 (possibly different for each block row) or PETSC_NULL 2103 2104 Output Parameter: 2105 . A - the matrix 2106 2107 Options Database Keys: 2108 $ -mat_no_unroll - uses code that does not unroll the loops in the 2109 $ block calculations (much slower) 2110 $ -mat_block_size - size of the blocks to use 2111 2112 Notes: 2113 The block AIJ format is fully compatible with standard Fortran 77 2114 storage. That is, the stored row and column indices can begin at 2115 either one (as in Fortran) or zero. See the users' manual for details. 2116 2117 Specify the preallocated storage with either nz or nnz (not both). 2118 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2119 allocation. For additional details, see the users manual chapter on 2120 matrices. 2121 2122 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2123 @*/ 2124 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 2125 { 2126 Mat B; 2127 Mat_SeqBAIJ *b; 2128 int i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs,size; 2129 2130 PetscFunctionBegin; 2131 MPI_Comm_size(comm,&size); 2132 if (size > 1) SETERRQ(1,0,"Comm must be of size 1"); 2133 2134 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr); 2135 2136 if (mbs*bs!=m || nbs*bs!=n) { 2137 SETERRQ(1,0,"Number rows, cols must be divisible by blocksize"); 2138 } 2139 2140 *A = 0; 2141 PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATSEQBAIJ,comm,MatDestroy,MatView); 2142 PLogObjectCreate(B); 2143 B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b); 2144 PetscMemzero(b,sizeof(Mat_SeqBAIJ)); 2145 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 2146 ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr); 2147 if (!flg) { 2148 switch (bs) { 2149 case 1: 2150 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2151 B->ops.solve = MatSolve_SeqBAIJ_1; 2152 B->ops.mult = MatMult_SeqBAIJ_1; 2153 B->ops.multadd = MatMultAdd_SeqBAIJ_1; 2154 break; 2155 case 2: 2156 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2157 B->ops.solve = MatSolve_SeqBAIJ_2; 2158 B->ops.mult = MatMult_SeqBAIJ_2; 2159 B->ops.multadd = MatMultAdd_SeqBAIJ_2; 2160 break; 2161 case 3: 2162 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2163 B->ops.solve = MatSolve_SeqBAIJ_3; 2164 B->ops.mult = MatMult_SeqBAIJ_3; 2165 B->ops.multadd = MatMultAdd_SeqBAIJ_3; 2166 break; 2167 case 4: 2168 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2169 B->ops.solve = MatSolve_SeqBAIJ_4; 2170 B->ops.mult = MatMult_SeqBAIJ_4; 2171 B->ops.multadd = MatMultAdd_SeqBAIJ_4; 2172 break; 2173 case 5: 2174 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2175 B->ops.solve = MatSolve_SeqBAIJ_5; 2176 B->ops.mult = MatMult_SeqBAIJ_5; 2177 B->ops.multadd = MatMultAdd_SeqBAIJ_5; 2178 break; 2179 case 7: 2180 B->ops.mult = MatMult_SeqBAIJ_7; 2181 B->ops.solve = MatSolve_SeqBAIJ_7; 2182 B->ops.multadd = MatMultAdd_SeqBAIJ_7; 2183 break; 2184 } 2185 } 2186 B->destroy = MatDestroy_SeqBAIJ; 2187 B->view = MatView_SeqBAIJ; 2188 B->factor = 0; 2189 B->lupivotthreshold = 1.0; 2190 B->mapping = 0; 2191 b->row = 0; 2192 b->col = 0; 2193 b->reallocs = 0; 2194 2195 b->m = m; B->m = m; B->M = m; 2196 b->n = n; B->n = n; B->N = n; 2197 b->mbs = mbs; 2198 b->nbs = nbs; 2199 b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax); 2200 if (nnz == PETSC_NULL) { 2201 if (nz == PETSC_DEFAULT) nz = 5; 2202 else if (nz <= 0) nz = 1; 2203 for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 2204 nz = nz*mbs; 2205 } else { 2206 nz = 0; 2207 for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 2208 } 2209 2210 /* allocate the matrix space */ 2211 len = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int); 2212 b->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a); 2213 PetscMemzero(b->a,nz*bs2*sizeof(Scalar)); 2214 b->j = (int *) (b->a + nz*bs2); 2215 PetscMemzero(b->j,nz*sizeof(int)); 2216 b->i = b->j + nz; 2217 b->singlemalloc = 1; 2218 2219 b->i[0] = 0; 2220 for (i=1; i<mbs+1; i++) { 2221 b->i[i] = b->i[i-1] + b->imax[i-1]; 2222 } 2223 2224 /* b->ilen will count nonzeros in each block row so far. */ 2225 b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 2226 PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 2227 for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 2228 2229 b->bs = bs; 2230 b->bs2 = bs2; 2231 b->mbs = mbs; 2232 b->nz = 0; 2233 b->maxnz = nz*bs2; 2234 b->sorted = 0; 2235 b->roworiented = 1; 2236 b->nonew = 0; 2237 b->diag = 0; 2238 b->solve_work = 0; 2239 b->mult_work = 0; 2240 b->spptr = 0; 2241 B->info.nz_unneeded = (double)b->maxnz; 2242 2243 *A = B; 2244 ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr); 2245 if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); } 2246 PetscFunctionReturn(0); 2247 } 2248 2249 #undef __FUNC__ 2250 #define __FUNC__ "MatConvertSameType_SeqBAIJ" 2251 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues) 2252 { 2253 Mat C; 2254 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 2255 int i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 2256 2257 PetscFunctionBegin; 2258 if (a->i[mbs] != nz) SETERRQ(1,0,"Corrupt matrix"); 2259 2260 *B = 0; 2261 PetscHeaderCreate(C,_p_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm,MatDestroy,MatView); 2262 PLogObjectCreate(C); 2263 C->data = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c); 2264 PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps)); 2265 C->destroy = MatDestroy_SeqBAIJ; 2266 C->view = MatView_SeqBAIJ; 2267 C->factor = A->factor; 2268 c->row = 0; 2269 c->col = 0; 2270 C->assembled = PETSC_TRUE; 2271 2272 c->m = C->m = a->m; 2273 c->n = C->n = a->n; 2274 C->M = a->m; 2275 C->N = a->n; 2276 2277 c->bs = a->bs; 2278 c->bs2 = a->bs2; 2279 c->mbs = a->mbs; 2280 c->nbs = a->nbs; 2281 2282 c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax); 2283 c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen); 2284 for ( i=0; i<mbs; i++ ) { 2285 c->imax[i] = a->imax[i]; 2286 c->ilen[i] = a->ilen[i]; 2287 } 2288 2289 /* allocate the matrix space */ 2290 c->singlemalloc = 1; 2291 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int)); 2292 c->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a); 2293 c->j = (int *) (c->a + nz*bs2); 2294 c->i = c->j + nz; 2295 PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int)); 2296 if (mbs > 0) { 2297 PetscMemcpy(c->j,a->j,nz*sizeof(int)); 2298 if (cpvalues == COPY_VALUES) { 2299 PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar)); 2300 } 2301 } 2302 2303 PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 2304 c->sorted = a->sorted; 2305 c->roworiented = a->roworiented; 2306 c->nonew = a->nonew; 2307 2308 if (a->diag) { 2309 c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag); 2310 PLogObjectMemory(C,(mbs+1)*sizeof(int)); 2311 for ( i=0; i<mbs; i++ ) { 2312 c->diag[i] = a->diag[i]; 2313 } 2314 } 2315 else c->diag = 0; 2316 c->nz = a->nz; 2317 c->maxnz = a->maxnz; 2318 c->solve_work = 0; 2319 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 2320 c->mult_work = 0; 2321 *B = C; 2322 PetscFunctionReturn(0); 2323 } 2324 2325 #undef __FUNC__ 2326 #define __FUNC__ "MatLoad_SeqBAIJ" 2327 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 2328 { 2329 Mat_SeqBAIJ *a; 2330 Mat B; 2331 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg; 2332 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2333 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 2334 int *masked, nmask,tmp,bs2,ishift; 2335 Scalar *aa; 2336 MPI_Comm comm = ((PetscObject) viewer)->comm; 2337 2338 PetscFunctionBegin; 2339 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 2340 bs2 = bs*bs; 2341 2342 MPI_Comm_size(comm,&size); 2343 if (size > 1) SETERRQ(1,0,"view must have one processor"); 2344 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 2345 ierr = PetscBinaryRead(fd,header,4,PETSC_INT); CHKERRQ(ierr); 2346 if (header[0] != MAT_COOKIE) SETERRQ(1,0,"not Mat object"); 2347 M = header[1]; N = header[2]; nz = header[3]; 2348 2349 if (M != N) SETERRQ(1,0,"Can only do square matrices"); 2350 2351 /* 2352 This code adds extra rows to make sure the number of rows is 2353 divisible by the blocksize 2354 */ 2355 mbs = M/bs; 2356 extra_rows = bs - M + bs*(mbs); 2357 if (extra_rows == bs) extra_rows = 0; 2358 else mbs++; 2359 if (extra_rows) { 2360 PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 2361 } 2362 2363 /* read in row lengths */ 2364 rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 2365 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 2366 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 2367 2368 /* read in column indices */ 2369 jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj); 2370 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT); CHKERRQ(ierr); 2371 for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 2372 2373 /* loop over row lengths determining block row lengths */ 2374 browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 2375 PetscMemzero(browlengths,mbs*sizeof(int)); 2376 mask = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask); 2377 PetscMemzero(mask,mbs*sizeof(int)); 2378 masked = mask + mbs; 2379 rowcount = 0; nzcount = 0; 2380 for ( i=0; i<mbs; i++ ) { 2381 nmask = 0; 2382 for ( j=0; j<bs; j++ ) { 2383 kmax = rowlengths[rowcount]; 2384 for ( k=0; k<kmax; k++ ) { 2385 tmp = jj[nzcount++]/bs; 2386 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2387 } 2388 rowcount++; 2389 } 2390 browlengths[i] += nmask; 2391 /* zero out the mask elements we set */ 2392 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 2393 } 2394 2395 /* create our matrix */ 2396 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr); 2397 B = *A; 2398 a = (Mat_SeqBAIJ *) B->data; 2399 2400 /* set matrix "i" values */ 2401 a->i[0] = 0; 2402 for ( i=1; i<= mbs; i++ ) { 2403 a->i[i] = a->i[i-1] + browlengths[i-1]; 2404 a->ilen[i-1] = browlengths[i-1]; 2405 } 2406 a->nz = 0; 2407 for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 2408 2409 /* read in nonzero values */ 2410 aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 2411 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR); CHKERRQ(ierr); 2412 for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 2413 2414 /* set "a" and "j" values into matrix */ 2415 nzcount = 0; jcount = 0; 2416 for ( i=0; i<mbs; i++ ) { 2417 nzcountb = nzcount; 2418 nmask = 0; 2419 for ( j=0; j<bs; j++ ) { 2420 kmax = rowlengths[i*bs+j]; 2421 for ( k=0; k<kmax; k++ ) { 2422 tmp = jj[nzcount++]/bs; 2423 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2424 } 2425 rowcount++; 2426 } 2427 /* sort the masked values */ 2428 PetscSortInt(nmask,masked); 2429 2430 /* set "j" values into matrix */ 2431 maskcount = 1; 2432 for ( j=0; j<nmask; j++ ) { 2433 a->j[jcount++] = masked[j]; 2434 mask[masked[j]] = maskcount++; 2435 } 2436 /* set "a" values into matrix */ 2437 ishift = bs2*a->i[i]; 2438 for ( j=0; j<bs; j++ ) { 2439 kmax = rowlengths[i*bs+j]; 2440 for ( k=0; k<kmax; k++ ) { 2441 tmp = jj[nzcountb]/bs ; 2442 block = mask[tmp] - 1; 2443 point = jj[nzcountb] - bs*tmp; 2444 idx = ishift + bs2*block + j + bs*point; 2445 a->a[idx] = aa[nzcountb++]; 2446 } 2447 } 2448 /* zero out the mask elements we set */ 2449 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 2450 } 2451 if (jcount != a->nz) SETERRQ(1,0,"Error bad binary matrix"); 2452 2453 PetscFree(rowlengths); 2454 PetscFree(browlengths); 2455 PetscFree(aa); 2456 PetscFree(jj); 2457 PetscFree(mask); 2458 2459 B->assembled = PETSC_TRUE; 2460 2461 ierr = MatView_Private(B); CHKERRQ(ierr); 2462 PetscFunctionReturn(0); 2463 } 2464 2465 2466 2467