1 2 #ifdef PETSC_RCS_HEADER 3 static char vcid[] = "$Id: baij.c,v 1.118 1997/12/01 01:55:02 bsmith Exp bsmith $"; 4 #endif 5 6 /* 7 Defines the basic matrix operations for the BAIJ (compressed row) 8 matrix storage format. 9 */ 10 11 #include "pinclude/pviewer.h" 12 #include "sys.h" 13 #include "src/mat/impls/baij/seq/baij.h" 14 #include "src/vec/vecimpl.h" 15 #include "src/inline/spops.h" 16 #include "petsc.h" 17 18 19 /* 20 Adds diagonal pointers to sparse matrix structure. 21 */ 22 23 #undef __FUNC__ 24 #define __FUNC__ "MatMarkDiag_SeqBAIJ" 25 int MatMarkDiag_SeqBAIJ(Mat A) 26 { 27 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 28 int i,j, *diag, m = a->mbs; 29 30 PetscFunctionBegin; 31 diag = (int *) PetscMalloc( (m+1)*sizeof(int)); CHKPTRQ(diag); 32 PLogObjectMemory(A,(m+1)*sizeof(int)); 33 for ( i=0; i<m; i++ ) { 34 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 35 if (a->j[j] == i) { 36 diag[i] = j; 37 break; 38 } 39 } 40 } 41 a->diag = diag; 42 PetscFunctionReturn(0); 43 } 44 45 46 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 47 48 #undef __FUNC__ 49 #define __FUNC__ "MatGetRowIJ_SeqBAIJ" 50 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 51 PetscTruth *done) 52 { 53 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 54 int ierr, n = a->mbs,i; 55 56 PetscFunctionBegin; 57 *nn = n; 58 if (!ia) PetscFunctionReturn(0); 59 if (symmetric) { 60 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 61 } else if (oshift == 1) { 62 /* temporarily add 1 to i and j indices */ 63 int nz = a->i[n] + 1; 64 for ( i=0; i<nz; i++ ) a->j[i]++; 65 for ( i=0; i<n+1; i++ ) a->i[i]++; 66 *ia = a->i; *ja = a->j; 67 } else { 68 *ia = a->i; *ja = a->j; 69 } 70 71 PetscFunctionReturn(0); 72 } 73 74 #undef __FUNC__ 75 #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ" 76 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 77 PetscTruth *done) 78 { 79 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 80 int i,n = a->mbs; 81 82 PetscFunctionBegin; 83 if (!ia) PetscFunctionReturn(0); 84 if (symmetric) { 85 PetscFree(*ia); 86 PetscFree(*ja); 87 } else if (oshift == 1) { 88 int nz = a->i[n]; 89 for ( i=0; i<nz; i++ ) a->j[i]--; 90 for ( i=0; i<n+1; i++ ) a->i[i]--; 91 } 92 PetscFunctionReturn(0); 93 } 94 95 96 #undef __FUNC__ 97 #define __FUNC__ "MatView_SeqBAIJ_Binary" 98 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 99 { 100 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 101 int i, fd, *col_lens, ierr, bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 102 Scalar *aa; 103 104 PetscFunctionBegin; 105 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 106 col_lens = (int *) PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 107 col_lens[0] = MAT_COOKIE; 108 109 col_lens[1] = a->m; 110 col_lens[2] = a->n; 111 col_lens[3] = a->nz*bs2; 112 113 /* store lengths of each row and write (including header) to file */ 114 count = 0; 115 for ( i=0; i<a->mbs; i++ ) { 116 for ( j=0; j<bs; j++ ) { 117 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 118 } 119 } 120 ierr = PetscBinaryWrite(fd,col_lens,4+a->m,PETSC_INT,1); CHKERRQ(ierr); 121 PetscFree(col_lens); 122 123 /* store column indices (zero start index) */ 124 jj = (int *) PetscMalloc( (a->nz+1)*bs2*sizeof(int) ); CHKPTRQ(jj); 125 count = 0; 126 for ( i=0; i<a->mbs; i++ ) { 127 for ( j=0; j<bs; j++ ) { 128 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 129 for ( l=0; l<bs; l++ ) { 130 jj[count++] = bs*a->j[k] + l; 131 } 132 } 133 } 134 } 135 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0); CHKERRQ(ierr); 136 PetscFree(jj); 137 138 /* store nonzero values */ 139 aa = (Scalar *) PetscMalloc((a->nz+1)*bs2*sizeof(Scalar)); CHKPTRQ(aa); 140 count = 0; 141 for ( i=0; i<a->mbs; i++ ) { 142 for ( j=0; j<bs; j++ ) { 143 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 144 for ( l=0; l<bs; l++ ) { 145 aa[count++] = a->a[bs2*k + l*bs + j]; 146 } 147 } 148 } 149 } 150 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0); CHKERRQ(ierr); 151 PetscFree(aa); 152 PetscFunctionReturn(0); 153 } 154 155 #undef __FUNC__ 156 #define __FUNC__ "MatView_SeqBAIJ_ASCII" 157 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 158 { 159 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 160 int ierr, i,j,format,bs = a->bs,k,l,bs2=a->bs2; 161 FILE *fd; 162 char *outputname; 163 164 PetscFunctionBegin; 165 ierr = ViewerASCIIGetPointer(viewer,&fd); CHKERRQ(ierr); 166 ierr = ViewerFileGetOutputname_Private(viewer,&outputname);CHKERRQ(ierr); 167 ierr = ViewerGetFormat(viewer,&format); 168 if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) { 169 fprintf(fd," block size is %d\n",bs); 170 } 171 else if (format == VIEWER_FORMAT_ASCII_MATLAB) { 172 SETERRQ(PETSC_ERR_SUP,0,"Matlab format not supported"); 173 } 174 else if (format == VIEWER_FORMAT_ASCII_COMMON) { 175 for ( i=0; i<a->mbs; i++ ) { 176 for ( j=0; j<bs; j++ ) { 177 fprintf(fd,"row %d:",i*bs+j); 178 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 179 for ( l=0; l<bs; l++ ) { 180 #if defined(USE_PETSC_COMPLEX) 181 if (imag(a->a[bs2*k + l*bs + j]) > 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0) 182 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 183 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 184 else if (imag(a->a[bs2*k + l*bs + j]) < 0.0 && real(a->a[bs2*k + l*bs + j]) != 0.0) 185 fprintf(fd," %d %g - %g i",bs*a->j[k]+l, 186 real(a->a[bs2*k + l*bs + j]),-imag(a->a[bs2*k + l*bs + j])); 187 else if (real(a->a[bs2*k + l*bs + j]) != 0.0) 188 fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 189 #else 190 if (a->a[bs2*k + l*bs + j] != 0.0) 191 fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 192 #endif 193 } 194 } 195 fprintf(fd,"\n"); 196 } 197 } 198 } 199 else { 200 for ( i=0; i<a->mbs; i++ ) { 201 for ( j=0; j<bs; j++ ) { 202 fprintf(fd,"row %d:",i*bs+j); 203 for ( k=a->i[i]; k<a->i[i+1]; k++ ) { 204 for ( l=0; l<bs; l++ ) { 205 #if defined(USE_PETSC_COMPLEX) 206 if (imag(a->a[bs2*k + l*bs + j]) > 0.0) { 207 fprintf(fd," %d %g + %g i",bs*a->j[k]+l, 208 real(a->a[bs2*k + l*bs + j]),imag(a->a[bs2*k + l*bs + j])); 209 } 210 else if (imag(a->a[bs2*k + l*bs + j]) < 0.0) { 211 fprintf(fd," %d %g - %g i",bs*a->j[k]+l, 212 real(a->a[bs2*k + l*bs + j]),-imag(a->a[bs2*k + l*bs + j])); 213 } 214 else { 215 fprintf(fd," %d %g ",bs*a->j[k]+l,real(a->a[bs2*k + l*bs + j])); 216 } 217 #else 218 fprintf(fd," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]); 219 #endif 220 } 221 } 222 fprintf(fd,"\n"); 223 } 224 } 225 } 226 fflush(fd); 227 PetscFunctionReturn(0); 228 } 229 230 #undef __FUNC__ 231 #define __FUNC__ "MatView_SeqBAIJ_Draw" 232 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer) 233 { 234 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *) A->data; 235 int row,ierr,i,j,k,l,mbs=a->mbs,pause,color,bs=a->bs,bs2=a->bs2; 236 double xl,yl,xr,yr,w,h,xc,yc,scale = 1.0,x_l,x_r,y_l,y_r; 237 Scalar *aa; 238 Draw draw; 239 DrawButton button; 240 PetscTruth isnull; 241 242 PetscFunctionBegin; 243 ierr = ViewerDrawGetDraw(viewer,&draw);CHKERRQ(ierr); 244 ierr = DrawIsNull(draw,&isnull); CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 245 246 xr = a->n; yr = a->m; h = yr/10.0; w = xr/10.0; 247 xr += w; yr += h; xl = -w; yl = -h; 248 ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr); 249 /* loop over matrix elements drawing boxes */ 250 color = DRAW_BLUE; 251 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 252 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 253 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 254 x_l = a->j[j]*bs; x_r = x_l + 1.0; 255 aa = a->a + j*bs2; 256 for ( k=0; k<bs; k++ ) { 257 for ( l=0; l<bs; l++ ) { 258 if (PetscReal(*aa++) >= 0.) continue; 259 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 260 } 261 } 262 } 263 } 264 color = DRAW_CYAN; 265 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 266 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 267 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 268 x_l = a->j[j]*bs; x_r = x_l + 1.0; 269 aa = a->a + j*bs2; 270 for ( k=0; k<bs; k++ ) { 271 for ( l=0; l<bs; l++ ) { 272 if (PetscReal(*aa++) != 0.) continue; 273 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 274 } 275 } 276 } 277 } 278 279 color = DRAW_RED; 280 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 281 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 282 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 283 x_l = a->j[j]*bs; x_r = x_l + 1.0; 284 aa = a->a + j*bs2; 285 for ( k=0; k<bs; k++ ) { 286 for ( l=0; l<bs; l++ ) { 287 if (PetscReal(*aa++) <= 0.) continue; 288 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 289 } 290 } 291 } 292 } 293 294 DrawSynchronizedFlush(draw); 295 DrawGetPause(draw,&pause); 296 if (pause >= 0) { PetscSleep(pause); PetscFunctionReturn(0);} 297 298 /* allow the matrix to zoom or shrink */ 299 ierr = DrawSynchronizedGetMouseButton(draw,&button,&xc,&yc,0,0); 300 while (button != BUTTON_RIGHT) { 301 DrawSynchronizedClear(draw); 302 if (button == BUTTON_LEFT) scale = .5; 303 else if (button == BUTTON_CENTER) scale = 2.; 304 xl = scale*(xl + w - xc) + xc - w*scale; 305 xr = scale*(xr - w - xc) + xc + w*scale; 306 yl = scale*(yl + h - yc) + yc - h*scale; 307 yr = scale*(yr - h - yc) + yc + h*scale; 308 w *= scale; h *= scale; 309 ierr = DrawSetCoordinates(draw,xl,yl,xr,yr); CHKERRQ(ierr); 310 color = DRAW_BLUE; 311 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 312 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 313 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 314 x_l = a->j[j]*bs; x_r = x_l + 1.0; 315 aa = a->a + j*bs2; 316 for ( k=0; k<bs; k++ ) { 317 for ( l=0; l<bs; l++ ) { 318 if (PetscReal(*aa++) >= 0.) continue; 319 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 320 } 321 } 322 } 323 } 324 color = DRAW_CYAN; 325 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 326 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 327 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 328 x_l = a->j[j]*bs; x_r = x_l + 1.0; 329 aa = a->a + j*bs2; 330 for ( k=0; k<bs; k++ ) { 331 for ( l=0; l<bs; l++ ) { 332 if (PetscReal(*aa++) != 0.) continue; 333 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 334 } 335 } 336 } 337 } 338 339 color = DRAW_RED; 340 for ( i=0,row=0; i<mbs; i++,row+=bs ) { 341 for ( j=a->i[i]; j<a->i[i+1]; j++ ) { 342 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 343 x_l = a->j[j]*bs; x_r = x_l + 1.0; 344 aa = a->a + j*bs2; 345 for ( k=0; k<bs; k++ ) { 346 for ( l=0; l<bs; l++ ) { 347 if (PetscReal(*aa++) <= 0.) continue; 348 DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color); 349 } 350 } 351 } 352 } 353 ierr = DrawSynchronizedGetMouseButton(draw,&button,&xc,&yc,0,0); 354 } 355 PetscFunctionReturn(0); 356 } 357 358 #undef __FUNC__ 359 #define __FUNC__ "MatView_SeqBAIJ" 360 int MatView_SeqBAIJ(PetscObject obj,Viewer viewer) 361 { 362 Mat A = (Mat) obj; 363 ViewerType vtype; 364 int ierr; 365 366 PetscFunctionBegin; 367 ierr = ViewerGetType(viewer,&vtype); CHKERRQ(ierr); 368 if (vtype == MATLAB_VIEWER) { 369 SETERRQ(PETSC_ERR_SUP,0,"Matlab viewer not supported"); 370 } else if (vtype == ASCII_FILE_VIEWER || vtype == ASCII_FILES_VIEWER){ 371 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 372 } else if (vtype == BINARY_FILE_VIEWER) { 373 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 374 } else if (vtype == DRAW_VIEWER) { 375 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 376 } 377 PetscFunctionReturn(0); 378 } 379 380 #define CHUNKSIZE 10 381 382 #undef __FUNC__ 383 #define __FUNC__ "MatSetValues_SeqBAIJ" 384 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 385 { 386 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 387 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 388 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 389 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 390 int ridx,cidx,bs2=a->bs2; 391 Scalar *ap,value,*aa=a->a,*bap; 392 393 PetscFunctionBegin; 394 for ( k=0; k<m; k++ ) { /* loop over added rows */ 395 row = im[k]; brow = row/bs; 396 #if defined(USE_PETSC_BOPT_g) 397 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 398 if (row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 399 #endif 400 rp = aj + ai[brow]; 401 ap = aa + bs2*ai[brow]; 402 rmax = imax[brow]; 403 nrow = ailen[brow]; 404 low = 0; 405 for ( l=0; l<n; l++ ) { /* loop over added columns */ 406 #if defined(USE_PETSC_BOPT_g) 407 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 408 if (in[l] >= a->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 409 #endif 410 col = in[l]; bcol = col/bs; 411 ridx = row % bs; cidx = col % bs; 412 if (roworiented) { 413 value = *v++; 414 } else { 415 value = v[k + l*m]; 416 } 417 if (!sorted) low = 0; high = nrow; 418 while (high-low > 7) { 419 t = (low+high)/2; 420 if (rp[t] > bcol) high = t; 421 else low = t; 422 } 423 for ( i=low; i<high; i++ ) { 424 if (rp[i] > bcol) break; 425 if (rp[i] == bcol) { 426 bap = ap + bs2*i + bs*cidx + ridx; 427 if (is == ADD_VALUES) *bap += value; 428 else *bap = value; 429 goto noinsert1; 430 } 431 } 432 if (nonew == 1) goto noinsert1; 433 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 434 if (nrow >= rmax) { 435 /* there is no extra room in row, therefore enlarge */ 436 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 437 Scalar *new_a; 438 439 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 440 441 /* Malloc new storage space */ 442 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 443 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 444 new_j = (int *) (new_a + bs2*new_nz); 445 new_i = new_j + new_nz; 446 447 /* copy over old data into new slots */ 448 for ( ii=0; ii<brow+1; ii++ ) {new_i[ii] = ai[ii];} 449 for ( ii=brow+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 450 PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int)); 451 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 452 PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow, 453 len*sizeof(int)); 454 PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(Scalar)); 455 PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 456 PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), 457 aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(Scalar)); 458 /* free up old matrix storage */ 459 PetscFree(a->a); 460 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 461 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 462 a->singlemalloc = 1; 463 464 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 465 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 466 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 467 a->maxnz += bs2*CHUNKSIZE; 468 a->reallocs++; 469 a->nz++; 470 } 471 N = nrow++ - 1; 472 /* shift up all the later entries in this row */ 473 for ( ii=N; ii>=i; ii-- ) { 474 rp[ii+1] = rp[ii]; 475 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 476 } 477 if (N>=i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 478 rp[i] = bcol; 479 ap[bs2*i + bs*cidx + ridx] = value; 480 noinsert1:; 481 low = i; 482 } 483 ailen[brow] = nrow; 484 } 485 PetscFunctionReturn(0); 486 } 487 488 #undef __FUNC__ 489 #define __FUNC__ "MatSetValuesBlocked_SeqBAIJ" 490 int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 491 { 492 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 493 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 494 int *imax=a->imax,*ai=a->i,*ailen=a->ilen, roworiented=a->roworiented; 495 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval; 496 Scalar *ap,*value=v,*aa=a->a,*bap; 497 498 PetscFunctionBegin; 499 if (roworiented) { 500 stepval = (n-1)*bs; 501 } else { 502 stepval = (m-1)*bs; 503 } 504 for ( k=0; k<m; k++ ) { /* loop over added rows */ 505 row = im[k]; 506 #if defined(USE_PETSC_BOPT_g) 507 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 508 if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 509 #endif 510 rp = aj + ai[row]; 511 ap = aa + bs2*ai[row]; 512 rmax = imax[row]; 513 nrow = ailen[row]; 514 low = 0; 515 for ( l=0; l<n; l++ ) { /* loop over added columns */ 516 #if defined(USE_PETSC_BOPT_g) 517 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 518 if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 519 #endif 520 col = in[l]; 521 if (roworiented) { 522 value = v + k*(stepval+bs)*bs + l*bs; 523 } else { 524 value = v + l*(stepval+bs)*bs + k*bs; 525 } 526 if (!sorted) low = 0; high = nrow; 527 while (high-low > 7) { 528 t = (low+high)/2; 529 if (rp[t] > col) high = t; 530 else low = t; 531 } 532 for ( i=low; i<high; i++ ) { 533 if (rp[i] > col) break; 534 if (rp[i] == col) { 535 bap = ap + bs2*i; 536 if (roworiented) { 537 if (is == ADD_VALUES) { 538 for ( ii=0; ii<bs; ii++,value+=stepval ) { 539 for (jj=ii; jj<bs2; jj+=bs ) { 540 bap[jj] += *value++; 541 } 542 } 543 } else { 544 for ( ii=0; ii<bs; ii++,value+=stepval ) { 545 for (jj=ii; jj<bs2; jj+=bs ) { 546 bap[jj] = *value++; 547 } 548 } 549 } 550 } else { 551 if (is == ADD_VALUES) { 552 for ( ii=0; ii<bs; ii++,value+=stepval ) { 553 for (jj=0; jj<bs; jj++ ) { 554 *bap++ += *value++; 555 } 556 } 557 } else { 558 for ( ii=0; ii<bs; ii++,value+=stepval ) { 559 for (jj=0; jj<bs; jj++ ) { 560 *bap++ = *value++; 561 } 562 } 563 } 564 } 565 goto noinsert2; 566 } 567 } 568 if (nonew == 1) goto noinsert2; 569 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 570 if (nrow >= rmax) { 571 /* there is no extra room in row, therefore enlarge */ 572 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 573 Scalar *new_a; 574 575 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 576 577 /* malloc new storage space */ 578 len = new_nz*(sizeof(int)+bs2*sizeof(Scalar))+(a->mbs+1)*sizeof(int); 579 new_a = (Scalar *) PetscMalloc( len ); CHKPTRQ(new_a); 580 new_j = (int *) (new_a + bs2*new_nz); 581 new_i = new_j + new_nz; 582 583 /* copy over old data into new slots */ 584 for ( ii=0; ii<row+1; ii++ ) {new_i[ii] = ai[ii];} 585 for ( ii=row+1; ii<a->mbs+1; ii++ ) {new_i[ii] = ai[ii]+CHUNKSIZE;} 586 PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int)); 587 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 588 PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int)); 589 PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(Scalar)); 590 PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(Scalar)); 591 PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(Scalar)); 592 /* free up old matrix storage */ 593 PetscFree(a->a); 594 if (!a->singlemalloc) {PetscFree(a->i);PetscFree(a->j);} 595 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 596 a->singlemalloc = 1; 597 598 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 599 rmax = imax[row] = imax[row] + CHUNKSIZE; 600 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(Scalar))); 601 a->maxnz += bs2*CHUNKSIZE; 602 a->reallocs++; 603 a->nz++; 604 } 605 N = nrow++ - 1; 606 /* shift up all the later entries in this row */ 607 for ( ii=N; ii>=i; ii-- ) { 608 rp[ii+1] = rp[ii]; 609 PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(Scalar)); 610 } 611 if (N >= i) PetscMemzero(ap+bs2*i,bs2*sizeof(Scalar)); 612 rp[i] = col; 613 bap = ap + bs2*i; 614 if (roworiented) { 615 for ( ii=0; ii<bs; ii++,value+=stepval) { 616 for (jj=ii; jj<bs2; jj+=bs ) { 617 bap[jj] = *value++; 618 } 619 } 620 } else { 621 for ( ii=0; ii<bs; ii++,value+=stepval ) { 622 for (jj=0; jj<bs; jj++ ) { 623 *bap++ = *value++; 624 } 625 } 626 } 627 noinsert2:; 628 low = i; 629 } 630 ailen[row] = nrow; 631 } 632 PetscFunctionReturn(0); 633 } 634 635 #undef __FUNC__ 636 #define __FUNC__ "MatGetSize_SeqBAIJ" 637 int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 638 { 639 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 640 641 PetscFunctionBegin; 642 if (m) *m = a->m; 643 if (n) *n = a->n; 644 PetscFunctionReturn(0); 645 } 646 647 #undef __FUNC__ 648 #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ" 649 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 650 { 651 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 652 653 PetscFunctionBegin; 654 *m = 0; *n = a->m; 655 PetscFunctionReturn(0); 656 } 657 658 #undef __FUNC__ 659 #define __FUNC__ "MatGetRow_SeqBAIJ" 660 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 661 { 662 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 663 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 664 Scalar *aa,*v_i,*aa_i; 665 666 PetscFunctionBegin; 667 bs = a->bs; 668 ai = a->i; 669 aj = a->j; 670 aa = a->a; 671 bs2 = a->bs2; 672 673 if (row < 0 || row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row out of range"); 674 675 bn = row/bs; /* Block number */ 676 bp = row % bs; /* Block Position */ 677 M = ai[bn+1] - ai[bn]; 678 *nz = bs*M; 679 680 if (v) { 681 *v = 0; 682 if (*nz) { 683 *v = (Scalar *) PetscMalloc( (*nz)*sizeof(Scalar) ); CHKPTRQ(*v); 684 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 685 v_i = *v + i*bs; 686 aa_i = aa + bs2*(ai[bn] + i); 687 for ( j=bp,k=0; j<bs2; j+=bs,k++ ) {v_i[k] = aa_i[j];} 688 } 689 } 690 } 691 692 if (idx) { 693 *idx = 0; 694 if (*nz) { 695 *idx = (int *) PetscMalloc( (*nz)*sizeof(int) ); CHKPTRQ(*idx); 696 for ( i=0; i<M; i++ ) { /* for each block in the block row */ 697 idx_i = *idx + i*bs; 698 itmp = bs*aj[ai[bn] + i]; 699 for ( j=0; j<bs; j++ ) {idx_i[j] = itmp++;} 700 } 701 } 702 } 703 PetscFunctionReturn(0); 704 } 705 706 #undef __FUNC__ 707 #define __FUNC__ "MatRestoreRow_SeqBAIJ" 708 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 709 { 710 PetscFunctionBegin; 711 if (idx) {if (*idx) PetscFree(*idx);} 712 if (v) {if (*v) PetscFree(*v);} 713 PetscFunctionReturn(0); 714 } 715 716 #undef __FUNC__ 717 #define __FUNC__ "MatTranspose_SeqBAIJ" 718 int MatTranspose_SeqBAIJ(Mat A,Mat *B) 719 { 720 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 721 Mat C; 722 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 723 int *rows,*cols,bs2=a->bs2; 724 Scalar *array=a->a; 725 726 PetscFunctionBegin; 727 if (B==PETSC_NULL && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Square matrix only for in-place"); 728 col = (int *) PetscMalloc((1+nbs)*sizeof(int)); CHKPTRQ(col); 729 PetscMemzero(col,(1+nbs)*sizeof(int)); 730 731 for ( i=0; i<ai[mbs]; i++ ) col[aj[i]] += 1; 732 ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C); CHKERRQ(ierr); 733 PetscFree(col); 734 rows = (int *) PetscMalloc(2*bs*sizeof(int)); CHKPTRQ(rows); 735 cols = rows + bs; 736 for ( i=0; i<mbs; i++ ) { 737 cols[0] = i*bs; 738 for (k=1; k<bs; k++ ) cols[k] = cols[k-1] + 1; 739 len = ai[i+1] - ai[i]; 740 for ( j=0; j<len; j++ ) { 741 rows[0] = (*aj++)*bs; 742 for (k=1; k<bs; k++ ) rows[k] = rows[k-1] + 1; 743 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES); CHKERRQ(ierr); 744 array += bs2; 745 } 746 } 747 PetscFree(rows); 748 749 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 750 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 751 752 if (B != PETSC_NULL) { 753 *B = C; 754 } else { 755 /* This isn't really an in-place transpose */ 756 PetscFree(a->a); 757 if (!a->singlemalloc) {PetscFree(a->i); PetscFree(a->j);} 758 if (a->diag) PetscFree(a->diag); 759 if (a->ilen) PetscFree(a->ilen); 760 if (a->imax) PetscFree(a->imax); 761 if (a->solve_work) PetscFree(a->solve_work); 762 PetscFree(a); 763 PetscMemcpy(A,C,sizeof(struct _p_Mat)); 764 PetscHeaderDestroy(C); 765 } 766 PetscFunctionReturn(0); 767 } 768 769 770 #undef __FUNC__ 771 #define __FUNC__ "MatAssemblyEnd_SeqBAIJ" 772 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 773 { 774 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 775 int fshift = 0,i,j,*ai = a->i, *aj = a->j, *imax = a->imax; 776 int m = a->m,*ip, N, *ailen = a->ilen; 777 int mbs = a->mbs, bs2 = a->bs2,rmax = 0; 778 Scalar *aa = a->a, *ap; 779 780 PetscFunctionBegin; 781 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 782 783 if (m) rmax = ailen[0]; 784 for ( i=1; i<mbs; i++ ) { 785 /* move each row back by the amount of empty slots (fshift) before it*/ 786 fshift += imax[i-1] - ailen[i-1]; 787 rmax = PetscMax(rmax,ailen[i]); 788 if (fshift) { 789 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 790 N = ailen[i]; 791 for ( j=0; j<N; j++ ) { 792 ip[j-fshift] = ip[j]; 793 PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(Scalar)); 794 } 795 } 796 ai[i] = ai[i-1] + ailen[i-1]; 797 } 798 if (mbs) { 799 fshift += imax[mbs-1] - ailen[mbs-1]; 800 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 801 } 802 /* reset ilen and imax for each row */ 803 for ( i=0; i<mbs; i++ ) { 804 ailen[i] = imax[i] = ai[i+1] - ai[i]; 805 } 806 a->nz = ai[mbs]; 807 808 /* diagonals may have moved, so kill the diagonal pointers */ 809 if (fshift && a->diag) { 810 PetscFree(a->diag); 811 PLogObjectMemory(A,-(m+1)*sizeof(int)); 812 a->diag = 0; 813 } 814 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 815 m,a->n,a->bs,fshift*bs2,a->nz*bs2); 816 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n", 817 a->reallocs); 818 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 819 a->reallocs = 0; 820 A->info.nz_unneeded = (double)fshift*bs2; 821 822 PetscFunctionReturn(0); 823 } 824 825 #undef __FUNC__ 826 #define __FUNC__ "MatZeroEntries_SeqBAIJ" 827 int MatZeroEntries_SeqBAIJ(Mat A) 828 { 829 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 830 831 PetscFunctionBegin; 832 PetscMemzero(a->a,a->bs2*a->i[a->mbs]*sizeof(Scalar)); 833 PetscFunctionReturn(0); 834 } 835 836 #undef __FUNC__ 837 #define __FUNC__ "MatDestroy_SeqBAIJ" 838 int MatDestroy_SeqBAIJ(PetscObject obj) 839 { 840 Mat A = (Mat) obj; 841 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 842 843 #if defined(USE_PETSC_LOG) 844 PLogObjectState(obj,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 845 #endif 846 PetscFree(a->a); 847 if (!a->singlemalloc) { PetscFree(a->i); PetscFree(a->j);} 848 if (a->diag) PetscFree(a->diag); 849 if (a->ilen) PetscFree(a->ilen); 850 if (a->imax) PetscFree(a->imax); 851 if (a->solve_work) PetscFree(a->solve_work); 852 if (a->mult_work) PetscFree(a->mult_work); 853 PetscFree(a); 854 PLogObjectDestroy(A); 855 PetscHeaderDestroy(A); 856 PetscFunctionReturn(0); 857 } 858 859 #undef __FUNC__ 860 #define __FUNC__ "MatSetOption_SeqBAIJ" 861 int MatSetOption_SeqBAIJ(Mat A,MatOption op) 862 { 863 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 864 865 PetscFunctionBegin; 866 if (op == MAT_ROW_ORIENTED) a->roworiented = 1; 867 else if (op == MAT_COLUMN_ORIENTED) a->roworiented = 0; 868 else if (op == MAT_COLUMNS_SORTED) a->sorted = 1; 869 else if (op == MAT_COLUMNS_UNSORTED) a->sorted = 0; 870 else if (op == MAT_NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 871 else if (op == MAT_NEW_NONZERO_LOCATION_ERROR) a->nonew = -1; 872 else if (op == MAT_NEW_NONZERO_ALLOCATION_ERROR) a->nonew = -2; 873 else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 874 else if (op == MAT_ROWS_SORTED || 875 op == MAT_ROWS_UNSORTED || 876 op == MAT_SYMMETRIC || 877 op == MAT_STRUCTURALLY_SYMMETRIC || 878 op == MAT_YES_NEW_DIAGONALS || 879 op == MAT_IGNORE_OFF_PROC_ENTRIES) { 880 PLogInfo(A,"Info:MatSetOption_SeqBAIJ:Option ignored\n"); 881 } else if (op == MAT_NO_NEW_DIAGONALS) { 882 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 883 } else { 884 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 885 } 886 PetscFunctionReturn(0); 887 } 888 889 890 /* -------------------------------------------------------*/ 891 /* Should check that shapes of vectors and matrices match */ 892 /* -------------------------------------------------------*/ 893 #include "pinclude/plapack.h" 894 895 #undef __FUNC__ 896 #define __FUNC__ "MatMult_SeqBAIJ_1" 897 static int MatMult_SeqBAIJ_1(Mat A,Vec xx,Vec zz) 898 { 899 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 900 register Scalar *x,*z,*v,sum; 901 int mbs=a->mbs,i,*idx,*ii,n; 902 903 PetscFunctionBegin; 904 VecGetArray_Fast(xx,x); 905 VecGetArray_Fast(zz,z); 906 907 idx = a->j; 908 v = a->a; 909 ii = a->i; 910 911 for ( i=0; i<mbs; i++ ) { 912 n = ii[1] - ii[0]; ii++; 913 sum = 0.0; 914 while (n--) sum += *v++ * x[*idx++]; 915 z[i] = sum; 916 } 917 VecRestoreArray_Fast(xx,x); 918 VecRestoreArray_Fast(zz,z); 919 PLogFlops(2*a->nz - a->m); 920 PetscFunctionReturn(0); 921 } 922 923 #undef __FUNC__ 924 #define __FUNC__ "MatMult_SeqBAIJ_2" 925 static int MatMult_SeqBAIJ_2(Mat A,Vec xx,Vec zz) 926 { 927 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 928 register Scalar *x,*z,*v,*xb,sum1,sum2; 929 register Scalar x1,x2; 930 int mbs=a->mbs,i,*idx,*ii,j,n; 931 932 PetscFunctionBegin; 933 VecGetArray_Fast(xx,x); 934 VecGetArray_Fast(zz,z); 935 936 idx = a->j; 937 v = a->a; 938 ii = a->i; 939 940 for ( i=0; i<mbs; i++ ) { 941 n = ii[1] - ii[0]; ii++; 942 sum1 = 0.0; sum2 = 0.0; 943 for ( j=0; j<n; j++ ) { 944 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 945 sum1 += v[0]*x1 + v[2]*x2; 946 sum2 += v[1]*x1 + v[3]*x2; 947 v += 4; 948 } 949 z[0] = sum1; z[1] = sum2; 950 z += 2; 951 } 952 VecRestoreArray_Fast(xx,x); 953 VecRestoreArray_Fast(zz,z); 954 PLogFlops(4*a->nz - a->m); 955 PetscFunctionReturn(0); 956 } 957 958 #undef __FUNC__ 959 #define __FUNC__ "MatMult_SeqBAIJ_3" 960 static int MatMult_SeqBAIJ_3(Mat A,Vec xx,Vec zz) 961 { 962 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 963 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 964 int mbs=a->mbs,i,*idx,*ii,j,n; 965 966 PetscFunctionBegin; 967 VecGetArray_Fast(xx,x); 968 VecGetArray_Fast(zz,z); 969 970 idx = a->j; 971 v = a->a; 972 ii = a->i; 973 974 for ( i=0; i<mbs; i++ ) { 975 n = ii[1] - ii[0]; ii++; 976 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; 977 for ( j=0; j<n; j++ ) { 978 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 979 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 980 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 981 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 982 v += 9; 983 } 984 z[0] = sum1; z[1] = sum2; z[2] = sum3; 985 z += 3; 986 } 987 VecRestoreArray_Fast(xx,x); 988 VecRestoreArray_Fast(zz,z); 989 PLogFlops(18*a->nz - a->m); 990 PetscFunctionReturn(0); 991 } 992 993 #undef __FUNC__ 994 #define __FUNC__ "MatMult_SeqBAIJ_4" 995 static int MatMult_SeqBAIJ_4(Mat A,Vec xx,Vec zz) 996 { 997 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 998 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,x1,x2,x3,x4; 999 int mbs=a->mbs,i,*idx,*ii,j,n; 1000 1001 PetscFunctionBegin; 1002 VecGetArray_Fast(xx,x); 1003 VecGetArray_Fast(zz,z); 1004 1005 idx = a->j; 1006 v = a->a; 1007 ii = a->i; 1008 1009 for ( i=0; i<mbs; i++ ) { 1010 n = ii[1] - ii[0]; ii++; 1011 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; 1012 for ( j=0; j<n; j++ ) { 1013 xb = x + 4*(*idx++); 1014 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1015 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 1016 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 1017 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 1018 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 1019 v += 16; 1020 } 1021 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 1022 z += 4; 1023 } 1024 VecRestoreArray_Fast(xx,x); 1025 VecRestoreArray_Fast(zz,z); 1026 PLogFlops(32*a->nz - a->m); 1027 PetscFunctionReturn(0); 1028 } 1029 1030 #undef __FUNC__ 1031 #define __FUNC__ "MatMult_SeqBAIJ_5" 1032 static int MatMult_SeqBAIJ_5(Mat A,Vec xx,Vec zz) 1033 { 1034 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1035 register Scalar sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5; 1036 register Scalar * restrict v,* restrict xb,* restrict z, * restrict x; 1037 int mbs=a->mbs,i,*idx,*ii,j,n; 1038 1039 VecGetArray_Fast(xx,x); 1040 VecGetArray_Fast(zz,z); 1041 1042 idx = a->j; 1043 v = a->a; 1044 ii = a->i; 1045 1046 for ( i=0; i<mbs; i++ ) { 1047 n = ii[1] - ii[0]; ii++; 1048 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; 1049 for ( j=0; j<n; j++ ) { 1050 xb = x + 5*(*idx++); 1051 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 1052 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 1053 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 1054 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 1055 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 1056 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 1057 v += 25; 1058 } 1059 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 1060 z += 5; 1061 } 1062 VecRestoreArray_Fast(xx,x); 1063 VecRestoreArray_Fast(zz,z); 1064 PLogFlops(50*a->nz - a->m); 1065 PetscFunctionReturn(0); 1066 } 1067 1068 #undef __FUNC__ 1069 #define __FUNC__ "MatMult_SeqBAIJ_7" 1070 static int MatMult_SeqBAIJ_7(Mat A,Vec xx,Vec zz) 1071 { 1072 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1073 register Scalar *x,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7; 1074 register Scalar x1,x2,x3,x4,x5,x6,x7; 1075 int mbs=a->mbs,i,*idx,*ii,j,n; 1076 1077 VecGetArray_Fast(xx,x); 1078 VecGetArray_Fast(zz,z); 1079 1080 idx = a->j; 1081 v = a->a; 1082 ii = a->i; 1083 1084 for ( i=0; i<mbs; i++ ) { 1085 n = ii[1] - ii[0]; ii++; 1086 sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; sum5 = 0.0; sum6 = 0.0; sum7 = 0.0; 1087 for ( j=0; j<n; j++ ) { 1088 xb = x + 7*(*idx++); 1089 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6]; 1090 sum1 += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 1091 sum2 += v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 1092 sum3 += v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 1093 sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 1094 sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 1095 sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 1096 sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 1097 v += 49; 1098 } 1099 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7; 1100 z += 7; 1101 } 1102 1103 VecRestoreArray_Fast(xx,x); 1104 VecRestoreArray_Fast(zz,z); 1105 PLogFlops(98*a->nz - a->m); 1106 PetscFunctionReturn(0); 1107 } 1108 1109 #undef __FUNC__ 1110 #define __FUNC__ "MatMult_SeqBAIJ_N" 1111 static int MatMult_SeqBAIJ_N(Mat A,Vec xx,Vec zz) 1112 { 1113 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1114 register Scalar *x,*z,*v,*xb; 1115 Scalar _DOne = 1.0,*work,*workt,_DZero = 0.0; 1116 int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2; 1117 int _One = 1,ncols,k; 1118 1119 PetscFunctionBegin; 1120 VecGetArray_Fast(xx,x); 1121 VecGetArray_Fast(zz,z); 1122 1123 idx = a->j; 1124 v = a->a; 1125 ii = a->i; 1126 1127 1128 if (!a->mult_work) { 1129 k = PetscMax(a->m,a->n); 1130 a->mult_work = (Scalar *) PetscMalloc((k+1)*sizeof(Scalar));CHKPTRQ(a->mult_work); 1131 } 1132 work = a->mult_work; 1133 for ( i=0; i<mbs; i++ ) { 1134 n = ii[1] - ii[0]; ii++; 1135 ncols = n*bs; 1136 workt = work; 1137 for ( j=0; j<n; j++ ) { 1138 xb = x + bs*(*idx++); 1139 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 1140 workt += bs; 1141 } 1142 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DZero,z,&_One); 1143 v += n*bs2; 1144 z += bs; 1145 } 1146 VecRestoreArray_Fast(xx,x); 1147 VecRestoreArray_Fast(zz,z); 1148 PLogFlops(2*a->nz*bs2 - a->m); 1149 PetscFunctionReturn(0); 1150 } 1151 1152 #undef __FUNC__ 1153 #define __FUNC__ "MatMultAdd_SeqBAIJ_1" 1154 static int MatMultAdd_SeqBAIJ_1(Mat A,Vec xx,Vec yy,Vec zz) 1155 { 1156 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1157 register Scalar *x,*y,*z,*v,sum; 1158 int mbs=a->mbs,i,*idx,*ii,n; 1159 1160 PetscFunctionBegin; 1161 VecGetArray_Fast(xx,x); 1162 VecGetArray_Fast(yy,y); 1163 VecGetArray_Fast(zz,z); 1164 1165 idx = a->j; 1166 v = a->a; 1167 ii = a->i; 1168 1169 for ( i=0; i<mbs; i++ ) { 1170 n = ii[1] - ii[0]; ii++; 1171 sum = y[i]; 1172 while (n--) sum += *v++ * x[*idx++]; 1173 z[i] = sum; 1174 } 1175 VecRestoreArray_Fast(xx,x); 1176 VecRestoreArray_Fast(yy,y); 1177 VecRestoreArray_Fast(zz,z); 1178 PLogFlops(2*a->nz); 1179 PetscFunctionReturn(0); 1180 } 1181 1182 #undef __FUNC__ 1183 #define __FUNC__ "MatMultAdd_SeqBAIJ_2" 1184 static int MatMultAdd_SeqBAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 1185 { 1186 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1187 register Scalar *x,*y,*z,*v,*xb,sum1,sum2; 1188 register Scalar x1,x2; 1189 int mbs=a->mbs,i,*idx,*ii,j,n; 1190 1191 PetscFunctionBegin; 1192 VecGetArray_Fast(xx,x); 1193 VecGetArray_Fast(yy,y); 1194 VecGetArray_Fast(zz,z); 1195 1196 idx = a->j; 1197 v = a->a; 1198 ii = a->i; 1199 1200 for ( i=0; i<mbs; i++ ) { 1201 n = ii[1] - ii[0]; ii++; 1202 sum1 = y[0]; sum2 = y[1]; 1203 for ( j=0; j<n; j++ ) { 1204 xb = x + 2*(*idx++); x1 = xb[0]; x2 = xb[1]; 1205 sum1 += v[0]*x1 + v[2]*x2; 1206 sum2 += v[1]*x1 + v[3]*x2; 1207 v += 4; 1208 } 1209 z[0] = sum1; z[1] = sum2; 1210 z += 2; y += 2; 1211 } 1212 VecRestoreArray_Fast(xx,x); 1213 VecRestoreArray_Fast(yy,y); 1214 VecRestoreArray_Fast(zz,z); 1215 PLogFlops(4*a->nz); 1216 PetscFunctionReturn(0); 1217 } 1218 1219 #undef __FUNC__ 1220 #define __FUNC__ "MatMultAdd_SeqBAIJ_3" 1221 static int MatMultAdd_SeqBAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 1222 { 1223 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1224 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,x1,x2,x3; 1225 int mbs=a->mbs,i,*idx,*ii,j,n; 1226 1227 PetscFunctionBegin; 1228 VecGetArray_Fast(xx,x); 1229 VecGetArray_Fast(yy,y); 1230 VecGetArray_Fast(zz,z); 1231 1232 idx = a->j; 1233 v = a->a; 1234 ii = a->i; 1235 1236 for ( i=0; i<mbs; i++ ) { 1237 n = ii[1] - ii[0]; ii++; 1238 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; 1239 for ( j=0; j<n; j++ ) { 1240 xb = x + 3*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1241 sum1 += v[0]*x1 + v[3]*x2 + v[6]*x3; 1242 sum2 += v[1]*x1 + v[4]*x2 + v[7]*x3; 1243 sum3 += v[2]*x1 + v[5]*x2 + v[8]*x3; 1244 v += 9; 1245 } 1246 z[0] = sum1; z[1] = sum2; z[2] = sum3; 1247 z += 3; y += 3; 1248 } 1249 VecRestoreArray_Fast(xx,x); 1250 VecRestoreArray_Fast(yy,y); 1251 VecRestoreArray_Fast(zz,z); 1252 PLogFlops(18*a->nz); 1253 PetscFunctionReturn(0); 1254 } 1255 1256 #undef __FUNC__ 1257 #define __FUNC__ "MatMultAdd_SeqBAIJ_4" 1258 static int MatMultAdd_SeqBAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 1259 { 1260 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1261 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,x1,x2,x3,x4; 1262 int mbs=a->mbs,i,*idx,*ii; 1263 int j,n; 1264 1265 PetscFunctionBegin; 1266 VecGetArray_Fast(xx,x); 1267 VecGetArray_Fast(yy,y); 1268 VecGetArray_Fast(zz,z); 1269 1270 idx = a->j; 1271 v = a->a; 1272 ii = a->i; 1273 1274 for ( i=0; i<mbs; i++ ) { 1275 n = ii[1] - ii[0]; ii++; 1276 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; 1277 for ( j=0; j<n; j++ ) { 1278 xb = x + 4*(*idx++); 1279 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1280 sum1 += v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 1281 sum2 += v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 1282 sum3 += v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 1283 sum4 += v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 1284 v += 16; 1285 } 1286 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; 1287 z += 4; y += 4; 1288 } 1289 VecRestoreArray_Fast(xx,x); 1290 VecRestoreArray_Fast(yy,y); 1291 VecRestoreArray_Fast(zz,z); 1292 PLogFlops(32*a->nz); 1293 PetscFunctionReturn(0); 1294 } 1295 1296 #undef __FUNC__ 1297 #define __FUNC__ "MatMultAdd_SeqBAIJ_5" 1298 static int MatMultAdd_SeqBAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 1299 { 1300 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1301 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5; 1302 int mbs=a->mbs,i,*idx,*ii,j,n; 1303 1304 PetscFunctionBegin; 1305 VecGetArray_Fast(xx,x); 1306 VecGetArray_Fast(yy,y); 1307 VecGetArray_Fast(zz,z); 1308 1309 idx = a->j; 1310 v = a->a; 1311 ii = a->i; 1312 1313 for ( i=0; i<mbs; i++ ) { 1314 n = ii[1] - ii[0]; ii++; 1315 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; 1316 for ( j=0; j<n; j++ ) { 1317 xb = x + 5*(*idx++); 1318 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; 1319 sum1 += v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 1320 sum2 += v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 1321 sum3 += v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 1322 sum4 += v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 1323 sum5 += v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 1324 v += 25; 1325 } 1326 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; 1327 z += 5; y += 5; 1328 } 1329 VecRestoreArray_Fast(xx,x); 1330 VecRestoreArray_Fast(yy,y); 1331 VecRestoreArray_Fast(zz,z); 1332 PLogFlops(50*a->nz); 1333 PetscFunctionReturn(0); 1334 } 1335 1336 #undef __FUNC__ 1337 #define __FUNC__ "MatMultAdd_SeqBAIJ_7" 1338 static int MatMultAdd_SeqBAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1339 { 1340 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1341 register Scalar *x,*y,*z,*v,*xb,sum1,sum2,sum3,sum4,sum5,sum6,sum7; 1342 register Scalar x1,x2,x3,x4,x5,x6,x7; 1343 int mbs=a->mbs,i,*idx,*ii,j,n; 1344 1345 PetscFunctionBegin; 1346 VecGetArray_Fast(xx,x); 1347 VecGetArray_Fast(yy,y); 1348 VecGetArray_Fast(zz,z); 1349 1350 idx = a->j; 1351 v = a->a; 1352 ii = a->i; 1353 1354 for ( i=0; i<mbs; i++ ) { 1355 n = ii[1] - ii[0]; ii++; 1356 sum1 = y[0]; sum2 = y[1]; sum3 = y[2]; sum4 = y[3]; sum5 = y[4]; sum6 = y[5]; sum7 = y[6]; 1357 for ( j=0; j<n; j++ ) { 1358 xb = x + 7*(*idx++); 1359 x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; x6 = xb[5]; x7 = xb[6]; 1360 sum1 += v[0]*x1 + v[7]*x2 + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7; 1361 sum2 += v[1]*x1 + v[8]*x2 + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7; 1362 sum3 += v[2]*x1 + v[9]*x2 + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7; 1363 sum4 += v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7; 1364 sum5 += v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7; 1365 sum6 += v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7; 1366 sum7 += v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7; 1367 v += 49; 1368 } 1369 z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; z[4] = sum5; z[5] = sum6; z[6] = sum7; 1370 z += 7; y += 7; 1371 } 1372 VecRestoreArray_Fast(xx,x); 1373 VecRestoreArray_Fast(yy,y); 1374 VecRestoreArray_Fast(zz,z); 1375 PLogFlops(98*a->nz); 1376 PetscFunctionReturn(0); 1377 } 1378 1379 1380 #undef __FUNC__ 1381 #define __FUNC__ "MatMultAdd_SeqBAIJ_N" 1382 static int MatMultAdd_SeqBAIJ_N(Mat A,Vec xx,Vec yy,Vec zz) 1383 { 1384 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1385 register Scalar *x,*z,*v,*xb; 1386 int mbs=a->mbs,i,*idx,*ii,bs=a->bs,j,n,bs2=a->bs2,ierr; 1387 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1388 1389 PetscFunctionBegin; 1390 if ( xx != yy) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 1391 1392 VecGetArray_Fast(xx,x); 1393 VecGetArray_Fast(zz,z); 1394 1395 idx = a->j; 1396 v = a->a; 1397 ii = a->i; 1398 1399 1400 if (!a->mult_work) { 1401 k = PetscMax(a->m,a->n); 1402 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar));CHKPTRQ(a->mult_work); 1403 } 1404 work = a->mult_work; 1405 for ( i=0; i<mbs; i++ ) { 1406 n = ii[1] - ii[0]; ii++; 1407 ncols = n*bs; 1408 workt = work; 1409 for ( j=0; j<n; j++ ) { 1410 xb = x + bs*(*idx++); 1411 for ( k=0; k<bs; k++ ) workt[k] = xb[k]; 1412 workt += bs; 1413 } 1414 LAgemv_("N",&bs,&ncols,&_DOne,v,&bs,work,&_One,&_DOne,z,&_One); 1415 v += n*bs2; 1416 z += bs; 1417 } 1418 VecRestoreArray_Fast(xx,x); 1419 VecRestoreArray_Fast(zz,z); 1420 PLogFlops(2*a->nz*bs2 ); 1421 PetscFunctionReturn(0); 1422 } 1423 1424 #undef __FUNC__ 1425 #define __FUNC__ "MatMultTrans_SeqBAIJ" 1426 int MatMultTrans_SeqBAIJ(Mat A,Vec xx,Vec zz) 1427 { 1428 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1429 Scalar *xg,*zg,*zb; 1430 register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 1431 int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 1432 int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1433 1434 1435 PetscFunctionBegin; 1436 VecGetArray_Fast(xx,xg); x = xg; 1437 VecGetArray_Fast(zz,zg); z = zg; 1438 PetscMemzero(z,N*sizeof(Scalar)); 1439 1440 idx = a->j; 1441 v = a->a; 1442 ii = a->i; 1443 1444 switch (bs) { 1445 case 1: 1446 for ( i=0; i<mbs; i++ ) { 1447 n = ii[1] - ii[0]; ii++; 1448 xb = x + i; x1 = xb[0]; 1449 ib = idx + ai[i]; 1450 for ( j=0; j<n; j++ ) { 1451 rval = ib[j]; 1452 z[rval] += *v++ * x1; 1453 } 1454 } 1455 break; 1456 case 2: 1457 for ( i=0; i<mbs; i++ ) { 1458 n = ii[1] - ii[0]; ii++; 1459 xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1460 ib = idx + ai[i]; 1461 for ( j=0; j<n; j++ ) { 1462 rval = ib[j]*2; 1463 z[rval++] += v[0]*x1 + v[1]*x2; 1464 z[rval++] += v[2]*x1 + v[3]*x2; 1465 v += 4; 1466 } 1467 } 1468 break; 1469 case 3: 1470 for ( i=0; i<mbs; i++ ) { 1471 n = ii[1] - ii[0]; ii++; 1472 xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1473 ib = idx + ai[i]; 1474 for ( j=0; j<n; j++ ) { 1475 rval = ib[j]*3; 1476 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1477 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1478 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1479 v += 9; 1480 } 1481 } 1482 break; 1483 case 4: 1484 for ( i=0; i<mbs; i++ ) { 1485 n = ii[1] - ii[0]; ii++; 1486 xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1487 ib = idx + ai[i]; 1488 for ( j=0; j<n; j++ ) { 1489 rval = ib[j]*4; 1490 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1491 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1492 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1493 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1494 v += 16; 1495 } 1496 } 1497 break; 1498 case 5: 1499 for ( i=0; i<mbs; i++ ) { 1500 n = ii[1] - ii[0]; ii++; 1501 xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1502 x4 = xb[3]; x5 = xb[4]; 1503 ib = idx + ai[i]; 1504 for ( j=0; j<n; j++ ) { 1505 rval = ib[j]*5; 1506 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1507 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1508 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1509 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1510 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1511 v += 25; 1512 } 1513 } 1514 break; 1515 /* block sizes larger then 5 by 5 are handled by BLAS */ 1516 default: { 1517 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1518 if (!a->mult_work) { 1519 k = PetscMax(a->m,a->n); 1520 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1521 CHKPTRQ(a->mult_work); 1522 } 1523 work = a->mult_work; 1524 for ( i=0; i<mbs; i++ ) { 1525 n = ii[1] - ii[0]; ii++; 1526 ncols = n*bs; 1527 PetscMemzero(work,ncols*sizeof(Scalar)); 1528 LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1529 v += n*bs2; 1530 x += bs; 1531 workt = work; 1532 for ( j=0; j<n; j++ ) { 1533 zb = z + bs*(*idx++); 1534 for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1535 workt += bs; 1536 } 1537 } 1538 } 1539 } 1540 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 1541 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1542 PLogFlops(2*a->nz*a->bs2 - a->n); 1543 PetscFunctionReturn(0); 1544 } 1545 1546 #undef __FUNC__ 1547 #define __FUNC__ "MatMultTransAdd_SeqBAIJ" 1548 int MatMultTransAdd_SeqBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1549 { 1550 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1551 Scalar *xg,*zg,*zb; 1552 register Scalar *x,*z,*v,*xb,x1,x2,x3,x4,x5; 1553 int mbs=a->mbs,i,*idx,*ii,*ai=a->i,rval,N=a->n; 1554 int bs=a->bs,j,n,bs2=a->bs2,*ib,ierr; 1555 1556 PetscFunctionBegin; 1557 VecGetArray_Fast(xx,xg); x = xg; 1558 VecGetArray_Fast(zz,zg); z = zg; 1559 1560 if ( yy != zz ) { ierr = VecCopy(yy,zz); CHKERRQ(ierr); } 1561 else PetscMemzero(z,N*sizeof(Scalar)); 1562 1563 idx = a->j; 1564 v = a->a; 1565 ii = a->i; 1566 1567 switch (bs) { 1568 case 1: 1569 for ( i=0; i<mbs; i++ ) { 1570 n = ii[1] - ii[0]; ii++; 1571 xb = x + i; x1 = xb[0]; 1572 ib = idx + ai[i]; 1573 for ( j=0; j<n; j++ ) { 1574 rval = ib[j]; 1575 z[rval] += *v++ * x1; 1576 } 1577 } 1578 break; 1579 case 2: 1580 for ( i=0; i<mbs; i++ ) { 1581 n = ii[1] - ii[0]; ii++; 1582 xb = x + 2*i; x1 = xb[0]; x2 = xb[1]; 1583 ib = idx + ai[i]; 1584 for ( j=0; j<n; j++ ) { 1585 rval = ib[j]*2; 1586 z[rval++] += v[0]*x1 + v[1]*x2; 1587 z[rval++] += v[2]*x1 + v[3]*x2; 1588 v += 4; 1589 } 1590 } 1591 break; 1592 case 3: 1593 for ( i=0; i<mbs; i++ ) { 1594 n = ii[1] - ii[0]; ii++; 1595 xb = x + 3*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1596 ib = idx + ai[i]; 1597 for ( j=0; j<n; j++ ) { 1598 rval = ib[j]*3; 1599 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3; 1600 z[rval++] += v[3]*x1 + v[4]*x2 + v[5]*x3; 1601 z[rval++] += v[6]*x1 + v[7]*x2 + v[8]*x3; 1602 v += 9; 1603 } 1604 } 1605 break; 1606 case 4: 1607 for ( i=0; i<mbs; i++ ) { 1608 n = ii[1] - ii[0]; ii++; 1609 xb = x + 4*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; 1610 ib = idx + ai[i]; 1611 for ( j=0; j<n; j++ ) { 1612 rval = ib[j]*4; 1613 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4; 1614 z[rval++] += v[4]*x1 + v[5]*x2 + v[6]*x3 + v[7]*x4; 1615 z[rval++] += v[8]*x1 + v[9]*x2 + v[10]*x3 + v[11]*x4; 1616 z[rval++] += v[12]*x1 + v[13]*x2 + v[14]*x3 + v[15]*x4; 1617 v += 16; 1618 } 1619 } 1620 break; 1621 case 5: 1622 for ( i=0; i<mbs; i++ ) { 1623 n = ii[1] - ii[0]; ii++; 1624 xb = x + 5*i; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; 1625 x4 = xb[3]; x5 = xb[4]; 1626 ib = idx + ai[i]; 1627 for ( j=0; j<n; j++ ) { 1628 rval = ib[j]*5; 1629 z[rval++] += v[0]*x1 + v[1]*x2 + v[2]*x3 + v[3]*x4 + v[4]*x5; 1630 z[rval++] += v[5]*x1 + v[6]*x2 + v[7]*x3 + v[8]*x4 + v[9]*x5; 1631 z[rval++] += v[10]*x1 + v[11]*x2 + v[12]*x3 + v[13]*x4 + v[14]*x5; 1632 z[rval++] += v[15]*x1 + v[16]*x2 + v[17]*x3 + v[18]*x4 + v[19]*x5; 1633 z[rval++] += v[20]*x1 + v[21]*x2 + v[22]*x3 + v[23]*x4 + v[24]*x5; 1634 v += 25; 1635 } 1636 } 1637 break; 1638 /* block sizes larger then 5 by 5 are handled by BLAS */ 1639 default: { 1640 int _One = 1,ncols,k; Scalar _DOne = 1.0, *work,*workt; 1641 if (!a->mult_work) { 1642 k = PetscMax(a->m,a->n); 1643 a->mult_work = (Scalar *) PetscMalloc(k*sizeof(Scalar)); 1644 CHKPTRQ(a->mult_work); 1645 } 1646 work = a->mult_work; 1647 for ( i=0; i<mbs; i++ ) { 1648 n = ii[1] - ii[0]; ii++; 1649 ncols = n*bs; 1650 PetscMemzero(work,ncols*sizeof(Scalar)); 1651 LAgemv_("T",&bs,&ncols,&_DOne,v,&bs,x,&_One,&_DOne,work,&_One); 1652 v += n*bs2; 1653 x += bs; 1654 workt = work; 1655 for ( j=0; j<n; j++ ) { 1656 zb = z + bs*(*idx++); 1657 for ( k=0; k<bs; k++ ) zb[k] += workt[k] ; 1658 workt += bs; 1659 } 1660 } 1661 } 1662 } 1663 ierr = VecRestoreArray(xx,&xg); CHKERRQ(ierr); 1664 ierr = VecRestoreArray(zz,&zg); CHKERRQ(ierr); 1665 PLogFlops(2*a->nz*a->bs2); 1666 PetscFunctionReturn(0); 1667 } 1668 1669 #undef __FUNC__ 1670 #define __FUNC__ "MatGetInfo_SeqBAIJ" 1671 int MatGetInfo_SeqBAIJ(Mat A,MatInfoType flag,MatInfo *info) 1672 { 1673 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1674 1675 PetscFunctionBegin; 1676 info->rows_global = (double)a->m; 1677 info->columns_global = (double)a->n; 1678 info->rows_local = (double)a->m; 1679 info->columns_local = (double)a->n; 1680 info->block_size = a->bs2; 1681 info->nz_allocated = a->maxnz; 1682 info->nz_used = a->bs2*a->nz; 1683 info->nz_unneeded = (double)(info->nz_allocated - info->nz_used); 1684 /* if (info->nz_unneeded != A->info.nz_unneeded) 1685 printf("space descrepancy: maxnz-nz = %d, nz_unneeded = %d\n",(int)info->nz_unneeded,(int)A->info.nz_unneeded); */ 1686 info->assemblies = A->num_ass; 1687 info->mallocs = a->reallocs; 1688 info->memory = A->mem; 1689 if (A->factor) { 1690 info->fill_ratio_given = A->info.fill_ratio_given; 1691 info->fill_ratio_needed = A->info.fill_ratio_needed; 1692 info->factor_mallocs = A->info.factor_mallocs; 1693 } else { 1694 info->fill_ratio_given = 0; 1695 info->fill_ratio_needed = 0; 1696 info->factor_mallocs = 0; 1697 } 1698 PetscFunctionReturn(0); 1699 } 1700 1701 #undef __FUNC__ 1702 #define __FUNC__ "MatEqual_SeqBAIJ" 1703 int MatEqual_SeqBAIJ(Mat A,Mat B, PetscTruth* flg) 1704 { 1705 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 1706 1707 PetscFunctionBegin; 1708 if (B->type !=MATSEQBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1709 1710 /* If the matrix/block dimensions are not equal, or no of nonzeros or shift */ 1711 if ((a->m != b->m) || (a->n !=b->n) || (a->bs != b->bs)|| (a->nz != b->nz)) { 1712 *flg = PETSC_FALSE; PetscFunctionReturn(0); 1713 } 1714 1715 /* if the a->i are the same */ 1716 if (PetscMemcmp(a->i,b->i, (a->mbs+1)*sizeof(int))) { 1717 *flg = PETSC_FALSE; PetscFunctionReturn(0); 1718 } 1719 1720 /* if a->j are the same */ 1721 if (PetscMemcmp(a->j,b->j,(a->nz)*sizeof(int))) { 1722 *flg = PETSC_FALSE; PetscFunctionReturn(0); 1723 } 1724 1725 /* if a->a are the same */ 1726 if (PetscMemcmp(a->a, b->a,(a->nz)*(a->bs)*(a->bs)*sizeof(Scalar))) { 1727 *flg = PETSC_FALSE; PetscFunctionReturn(0); 1728 } 1729 *flg = PETSC_TRUE; 1730 PetscFunctionReturn(0); 1731 1732 } 1733 1734 #undef __FUNC__ 1735 #define __FUNC__ "MatGetDiagonal_SeqBAIJ" 1736 int MatGetDiagonal_SeqBAIJ(Mat A,Vec v) 1737 { 1738 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1739 int i,j,k,n,row,bs,*ai,*aj,ambs,bs2; 1740 Scalar *x, zero = 0.0,*aa,*aa_j; 1741 1742 PetscFunctionBegin; 1743 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Not for factored matrix"); 1744 bs = a->bs; 1745 aa = a->a; 1746 ai = a->i; 1747 aj = a->j; 1748 ambs = a->mbs; 1749 bs2 = a->bs2; 1750 1751 VecSet(&zero,v); 1752 VecGetArray_Fast(v,x); VecGetLocalSize_Fast(v,n); 1753 if (n != a->m) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Nonconforming matrix and vector"); 1754 for ( i=0; i<ambs; i++ ) { 1755 for ( j=ai[i]; j<ai[i+1]; j++ ) { 1756 if (aj[j] == i) { 1757 row = i*bs; 1758 aa_j = aa+j*bs2; 1759 for (k=0; k<bs2; k+=(bs+1),row++) x[row] = aa_j[k]; 1760 break; 1761 } 1762 } 1763 } 1764 PetscFunctionReturn(0); 1765 } 1766 1767 #undef __FUNC__ 1768 #define __FUNC__ "MatDiagonalScale_SeqBAIJ" 1769 int MatDiagonalScale_SeqBAIJ(Mat A,Vec ll,Vec rr) 1770 { 1771 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1772 Scalar *l,*r,x,*v,*aa,*li,*ri; 1773 int i,j,k,lm,rn,M,m,n,*ai,*aj,mbs,tmp,bs,bs2; 1774 1775 PetscFunctionBegin; 1776 ai = a->i; 1777 aj = a->j; 1778 aa = a->a; 1779 m = a->m; 1780 n = a->n; 1781 bs = a->bs; 1782 mbs = a->mbs; 1783 bs2 = a->bs2; 1784 if (ll) { 1785 VecGetArray_Fast(ll,l); VecGetLocalSize_Fast(ll,lm); 1786 if (lm != m) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Left scaling vector wrong length"); 1787 for ( i=0; i<mbs; i++ ) { /* for each block row */ 1788 M = ai[i+1] - ai[i]; 1789 li = l + i*bs; 1790 v = aa + bs2*ai[i]; 1791 for ( j=0; j<M; j++ ) { /* for each block */ 1792 for ( k=0; k<bs2; k++ ) { 1793 (*v++) *= li[k%bs]; 1794 } 1795 } 1796 } 1797 } 1798 1799 if (rr) { 1800 VecGetArray_Fast(rr,r); VecGetLocalSize_Fast(rr,rn); 1801 if (rn != n) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Right scaling vector wrong length"); 1802 for ( i=0; i<mbs; i++ ) { /* for each block row */ 1803 M = ai[i+1] - ai[i]; 1804 v = aa + bs2*ai[i]; 1805 for ( j=0; j<M; j++ ) { /* for each block */ 1806 ri = r + bs*aj[ai[i]+j]; 1807 for ( k=0; k<bs; k++ ) { 1808 x = ri[k]; 1809 for ( tmp=0; tmp<bs; tmp++ ) (*v++) *= x; 1810 } 1811 } 1812 } 1813 } 1814 PetscFunctionReturn(0); 1815 } 1816 1817 1818 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,double,Mat*); 1819 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,double); 1820 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int); 1821 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatGetSubMatrixCall,Mat*); 1822 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatGetSubMatrixCall,Mat**); 1823 1824 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec); 1825 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec); 1826 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec); 1827 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec); 1828 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec); 1829 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec); 1830 extern int MatSolve_SeqBAIJ_7(Mat,Vec,Vec); 1831 1832 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 1833 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 1834 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 1835 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 1836 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 1837 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 1838 1839 #undef __FUNC__ 1840 #define __FUNC__ "MatNorm_SeqBAIJ" 1841 int MatNorm_SeqBAIJ(Mat A,NormType type,double *norm) 1842 { 1843 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1844 Scalar *v = a->a; 1845 double sum = 0.0; 1846 int i,nz=a->nz,bs2=a->bs2; 1847 1848 PetscFunctionBegin; 1849 if (type == NORM_FROBENIUS) { 1850 for (i=0; i< bs2*nz; i++ ) { 1851 #if defined(USE_PETSC_COMPLEX) 1852 sum += real(conj(*v)*(*v)); v++; 1853 #else 1854 sum += (*v)*(*v); v++; 1855 #endif 1856 } 1857 *norm = sqrt(sum); 1858 } 1859 else { 1860 SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 1861 } 1862 PetscFunctionReturn(0); 1863 } 1864 1865 extern int MatSolve_SeqBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 1866 /* 1867 note: This can only work for identity for row and col. It would 1868 be good to check this and otherwise generate an error. 1869 */ 1870 #undef __FUNC__ 1871 #define __FUNC__ "MatILUFactor_SeqBAIJ" 1872 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,double efill,int fill) 1873 { 1874 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1875 Mat outA; 1876 int ierr; 1877 1878 PetscFunctionBegin; 1879 if (fill != 0) SETERRQ(PETSC_ERR_SUP,0,"Only fill=0 supported"); 1880 1881 outA = inA; 1882 inA->factor = FACTOR_LU; 1883 a->row = row; 1884 a->col = col; 1885 1886 if (!a->solve_work) { 1887 a->solve_work = (Scalar *) PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 1888 PLogObjectMemory(inA,(a->m+a->bs)*sizeof(Scalar)); 1889 } 1890 1891 if (!a->diag) { 1892 ierr = MatMarkDiag_SeqBAIJ(inA); CHKERRQ(ierr); 1893 } 1894 ierr = MatLUFactorNumeric(inA,&outA); CHKERRQ(ierr); 1895 1896 /* 1897 Blocksize 4 has a special faster solver for ILU(0) factorization 1898 with natural ordering 1899 */ 1900 if (a->bs == 4) { 1901 inA->ops.solve = MatSolve_SeqBAIJ_4_NaturalOrdering; 1902 } 1903 1904 PetscFunctionReturn(0); 1905 } 1906 1907 #undef __FUNC__ 1908 #define __FUNC__ "MatScale_SeqBAIJ" 1909 int MatScale_SeqBAIJ(Scalar *alpha,Mat inA) 1910 { 1911 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) inA->data; 1912 int one = 1, totalnz = a->bs2*a->nz; 1913 1914 PetscFunctionBegin; 1915 BLscal_( &totalnz, alpha, a->a, &one ); 1916 PLogFlops(totalnz); 1917 PetscFunctionReturn(0); 1918 } 1919 1920 #undef __FUNC__ 1921 #define __FUNC__ "MatGetValues_SeqBAIJ" 1922 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 1923 { 1924 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 1925 int *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j; 1926 int *ai = a->i, *ailen = a->ilen; 1927 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 1928 Scalar *ap, *aa = a->a, zero = 0.0; 1929 1930 PetscFunctionBegin; 1931 for ( k=0; k<m; k++ ) { /* loop over rows */ 1932 row = im[k]; brow = row/bs; 1933 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 1934 if (row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 1935 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 1936 nrow = ailen[brow]; 1937 for ( l=0; l<n; l++ ) { /* loop over columns */ 1938 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 1939 if (in[l] >= a->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 1940 col = in[l] ; 1941 bcol = col/bs; 1942 cidx = col%bs; 1943 ridx = row%bs; 1944 high = nrow; 1945 low = 0; /* assume unsorted */ 1946 while (high-low > 5) { 1947 t = (low+high)/2; 1948 if (rp[t] > bcol) high = t; 1949 else low = t; 1950 } 1951 for ( i=low; i<high; i++ ) { 1952 if (rp[i] > bcol) break; 1953 if (rp[i] == bcol) { 1954 *v++ = ap[bs2*i+bs*cidx+ridx]; 1955 goto finished; 1956 } 1957 } 1958 *v++ = zero; 1959 finished:; 1960 } 1961 } 1962 PetscFunctionReturn(0); 1963 } 1964 1965 #undef __FUNC__ 1966 #define __FUNC__ "MatGetBlockSize_SeqBAIJ" 1967 int MatGetBlockSize_SeqBAIJ(Mat mat, int *bs) 1968 { 1969 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *) mat->data; 1970 1971 PetscFunctionBegin; 1972 *bs = baij->bs; 1973 PetscFunctionReturn(0); 1974 } 1975 1976 /* idx should be of length atlease bs */ 1977 #undef __FUNC__ 1978 #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Block" 1979 static int MatZeroRows_SeqBAIJ_Check_Block(int *idx, int bs, PetscTruth *flg) 1980 { 1981 int i,row; 1982 1983 PetscFunctionBegin; 1984 row = idx[0]; 1985 if (row%bs!=0) { *flg = PETSC_FALSE; PetscFunctionReturn(0); } 1986 1987 for ( i=1; i<bs; i++ ) { 1988 if (row+i != idx[i]) { *flg = PETSC_FALSE; PetscFunctionReturn(0); } 1989 } 1990 *flg = PETSC_TRUE; 1991 PetscFunctionReturn(0); 1992 } 1993 1994 #undef __FUNC__ 1995 #define __FUNC__ "MatZeroRows_SeqBAIJ" 1996 int MatZeroRows_SeqBAIJ(Mat A,IS is, Scalar *diag) 1997 { 1998 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1999 IS is_local; 2000 int ierr,i,j,count,m=baij->m,is_n,*is_idx,*rows,bs=baij->bs,bs2=baij->bs2; 2001 PetscTruth flg; 2002 Scalar zero = 0.0,*aa; 2003 2004 PetscFunctionBegin; 2005 /* Make a copy of the IS and sort it */ 2006 ierr = ISGetSize(is,&is_n);CHKERRQ(ierr); 2007 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 2008 ierr = ISCreateGeneral(A->comm,is_n,is_idx,&is_local); CHKERRQ(ierr); 2009 ierr = ISSort(is_local); CHKERRQ(ierr); 2010 ierr = ISGetIndices(is_local,&rows); CHKERRQ(ierr); 2011 2012 i = 0; 2013 while (i < is_n) { 2014 if (rows[i]<0 || rows[i]>m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"row out of range"); 2015 flg = PETSC_FALSE; 2016 if (i+bs <= is_n) {ierr = MatZeroRows_SeqBAIJ_Check_Block(rows+i,bs,&flg); CHKERRQ(ierr); } 2017 if (flg) { /* There exists a block of rows to be Zerowed */ 2018 baij->ilen[rows[i]/bs] = 0; 2019 i += bs; 2020 } else { /* Zero out only the requested row */ 2021 count = (baij->i[rows[i]/bs +1] - baij->i[rows[i]/bs])*bs; 2022 aa = baij->a + baij->i[rows[i]/bs]*bs2 + (rows[i]%bs); 2023 for ( j=0; j<count; j++ ) { 2024 aa[0] = zero; 2025 aa+=bs; 2026 } 2027 i++; 2028 } 2029 } 2030 if (diag) { 2031 for ( j=0; j<is_n; j++ ) { 2032 ierr = (*A->ops.setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 2033 } 2034 } 2035 ierr = ISRestoreIndices(is,&is_idx); CHKERRQ(ierr); 2036 ierr = ISRestoreIndices(is_local,&rows); CHKERRQ(ierr); 2037 ierr = ISDestroy(is_local); CHKERRQ(ierr); 2038 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); 2039 2040 PetscFunctionReturn(0); 2041 } 2042 2043 #undef __FUNC__ 2044 #define __FUNC__ "MatPrintHelp_SeqBAIJ" 2045 int MatPrintHelp_SeqBAIJ(Mat A) 2046 { 2047 static int called = 0; 2048 MPI_Comm comm = A->comm; 2049 2050 PetscFunctionBegin; 2051 if (called) {PetscFunctionReturn(0);} else called = 1; 2052 PetscPrintf(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n"); 2053 PetscPrintf(comm," -mat_block_size <block_size>\n"); 2054 PetscFunctionReturn(0); 2055 } 2056 2057 /* -------------------------------------------------------------------*/ 2058 static struct _MatOps MatOps = {MatSetValues_SeqBAIJ, 2059 MatGetRow_SeqBAIJ,MatRestoreRow_SeqBAIJ, 2060 MatMult_SeqBAIJ_N,MatMultAdd_SeqBAIJ_N, 2061 MatMultTrans_SeqBAIJ,MatMultTransAdd_SeqBAIJ, 2062 MatSolve_SeqBAIJ_N,0, 2063 0,0, 2064 MatLUFactor_SeqBAIJ,0, 2065 0, 2066 MatTranspose_SeqBAIJ, 2067 MatGetInfo_SeqBAIJ,MatEqual_SeqBAIJ, 2068 MatGetDiagonal_SeqBAIJ,MatDiagonalScale_SeqBAIJ,MatNorm_SeqBAIJ, 2069 0,MatAssemblyEnd_SeqBAIJ, 2070 0, 2071 MatSetOption_SeqBAIJ,MatZeroEntries_SeqBAIJ,MatZeroRows_SeqBAIJ, 2072 MatLUFactorSymbolic_SeqBAIJ,MatLUFactorNumeric_SeqBAIJ_N,0,0, 2073 MatGetSize_SeqBAIJ,MatGetSize_SeqBAIJ,MatGetOwnershipRange_SeqBAIJ, 2074 MatILUFactorSymbolic_SeqBAIJ,0, 2075 0,0, 2076 MatConvertSameType_SeqBAIJ,0,0, 2077 MatILUFactor_SeqBAIJ,0,0, 2078 MatGetSubMatrices_SeqBAIJ,MatIncreaseOverlap_SeqBAIJ, 2079 MatGetValues_SeqBAIJ,0, 2080 MatPrintHelp_SeqBAIJ,MatScale_SeqBAIJ, 2081 0,0,0,MatGetBlockSize_SeqBAIJ, 2082 MatGetRowIJ_SeqBAIJ, 2083 MatRestoreRowIJ_SeqBAIJ, 2084 0,0,0,0,0,0, 2085 MatSetValuesBlocked_SeqBAIJ}; 2086 2087 #undef __FUNC__ 2088 #define __FUNC__ "MatCreateSeqBAIJ" 2089 /*@C 2090 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 2091 compressed row) format. For good matrix assembly performance the 2092 user should preallocate the matrix storage by setting the parameter nz 2093 (or the array nzz). By setting these parameters accurately, performance 2094 during matrix assembly can be increased by more than a factor of 50. 2095 2096 Input Parameters: 2097 . comm - MPI communicator, set to PETSC_COMM_SELF 2098 . bs - size of block 2099 . m - number of rows 2100 . n - number of columns 2101 . nz - number of block nonzeros per block row (same for all rows) 2102 . nzz - array containing the number of block nonzeros in the various block rows 2103 (possibly different for each block row) or PETSC_NULL 2104 2105 Output Parameter: 2106 . A - the matrix 2107 2108 Options Database Keys: 2109 $ -mat_no_unroll - uses code that does not unroll the loops in the 2110 $ block calculations (much slower) 2111 $ -mat_block_size - size of the blocks to use 2112 2113 Notes: 2114 The block AIJ format is fully compatible with standard Fortran 77 2115 storage. That is, the stored row and column indices can begin at 2116 either one (as in Fortran) or zero. See the users' manual for details. 2117 2118 Specify the preallocated storage with either nz or nnz (not both). 2119 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 2120 allocation. For additional details, see the users manual chapter on 2121 matrices. 2122 2123 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues() 2124 @*/ 2125 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz, Mat *A) 2126 { 2127 Mat B; 2128 Mat_SeqBAIJ *b; 2129 int i,len,ierr,flg,mbs=m/bs,nbs=n/bs,bs2=bs*bs,size; 2130 2131 PetscFunctionBegin; 2132 MPI_Comm_size(comm,&size); 2133 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"Comm must be of size 1"); 2134 2135 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,&flg);CHKERRQ(ierr); 2136 2137 if (mbs*bs!=m || nbs*bs!=n) { 2138 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Number rows, cols must be divisible by blocksize"); 2139 } 2140 2141 *A = 0; 2142 PetscHeaderCreate(B,_p_Mat,MAT_COOKIE,MATSEQBAIJ,comm,MatDestroy,MatView); 2143 PLogObjectCreate(B); 2144 B->data = (void *) (b = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(b); 2145 PetscMemzero(b,sizeof(Mat_SeqBAIJ)); 2146 PetscMemcpy(&B->ops,&MatOps,sizeof(struct _MatOps)); 2147 ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg); CHKERRQ(ierr); 2148 if (!flg) { 2149 switch (bs) { 2150 case 1: 2151 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2152 B->ops.solve = MatSolve_SeqBAIJ_1; 2153 B->ops.mult = MatMult_SeqBAIJ_1; 2154 B->ops.multadd = MatMultAdd_SeqBAIJ_1; 2155 break; 2156 case 2: 2157 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2158 B->ops.solve = MatSolve_SeqBAIJ_2; 2159 B->ops.mult = MatMult_SeqBAIJ_2; 2160 B->ops.multadd = MatMultAdd_SeqBAIJ_2; 2161 break; 2162 case 3: 2163 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2164 B->ops.solve = MatSolve_SeqBAIJ_3; 2165 B->ops.mult = MatMult_SeqBAIJ_3; 2166 B->ops.multadd = MatMultAdd_SeqBAIJ_3; 2167 break; 2168 case 4: 2169 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2170 B->ops.solve = MatSolve_SeqBAIJ_4; 2171 B->ops.mult = MatMult_SeqBAIJ_4; 2172 B->ops.multadd = MatMultAdd_SeqBAIJ_4; 2173 break; 2174 case 5: 2175 B->ops.lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2176 B->ops.solve = MatSolve_SeqBAIJ_5; 2177 B->ops.mult = MatMult_SeqBAIJ_5; 2178 B->ops.multadd = MatMultAdd_SeqBAIJ_5; 2179 break; 2180 case 7: 2181 B->ops.mult = MatMult_SeqBAIJ_7; 2182 B->ops.solve = MatSolve_SeqBAIJ_7; 2183 B->ops.multadd = MatMultAdd_SeqBAIJ_7; 2184 break; 2185 } 2186 } 2187 B->destroy = MatDestroy_SeqBAIJ; 2188 B->view = MatView_SeqBAIJ; 2189 B->factor = 0; 2190 B->lupivotthreshold = 1.0; 2191 B->mapping = 0; 2192 b->row = 0; 2193 b->col = 0; 2194 b->reallocs = 0; 2195 2196 b->m = m; B->m = m; B->M = m; 2197 b->n = n; B->n = n; B->N = n; 2198 b->mbs = mbs; 2199 b->nbs = nbs; 2200 b->imax = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(b->imax); 2201 if (nnz == PETSC_NULL) { 2202 if (nz == PETSC_DEFAULT) nz = 5; 2203 else if (nz <= 0) nz = 1; 2204 for ( i=0; i<mbs; i++ ) b->imax[i] = nz; 2205 nz = nz*mbs; 2206 } else { 2207 nz = 0; 2208 for ( i=0; i<mbs; i++ ) {b->imax[i] = nnz[i]; nz += nnz[i];} 2209 } 2210 2211 /* allocate the matrix space */ 2212 len = nz*sizeof(int) + nz*bs2*sizeof(Scalar) + (b->m+1)*sizeof(int); 2213 b->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a); 2214 PetscMemzero(b->a,nz*bs2*sizeof(Scalar)); 2215 b->j = (int *) (b->a + nz*bs2); 2216 PetscMemzero(b->j,nz*sizeof(int)); 2217 b->i = b->j + nz; 2218 b->singlemalloc = 1; 2219 2220 b->i[0] = 0; 2221 for (i=1; i<mbs+1; i++) { 2222 b->i[i] = b->i[i-1] + b->imax[i-1]; 2223 } 2224 2225 /* b->ilen will count nonzeros in each block row so far. */ 2226 b->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); 2227 PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 2228 for ( i=0; i<mbs; i++ ) { b->ilen[i] = 0;} 2229 2230 b->bs = bs; 2231 b->bs2 = bs2; 2232 b->mbs = mbs; 2233 b->nz = 0; 2234 b->maxnz = nz*bs2; 2235 b->sorted = 0; 2236 b->roworiented = 1; 2237 b->nonew = 0; 2238 b->diag = 0; 2239 b->solve_work = 0; 2240 b->mult_work = 0; 2241 b->spptr = 0; 2242 B->info.nz_unneeded = (double)b->maxnz; 2243 2244 *A = B; 2245 ierr = OptionsHasName(PETSC_NULL,"-help", &flg); CHKERRQ(ierr); 2246 if (flg) {ierr = MatPrintHelp(B); CHKERRQ(ierr); } 2247 PetscFunctionReturn(0); 2248 } 2249 2250 #undef __FUNC__ 2251 #define __FUNC__ "MatConvertSameType_SeqBAIJ" 2252 int MatConvertSameType_SeqBAIJ(Mat A,Mat *B,int cpvalues) 2253 { 2254 Mat C; 2255 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ *) A->data; 2256 int i,len, mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 2257 2258 PetscFunctionBegin; 2259 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,0,"Corrupt matrix"); 2260 2261 *B = 0; 2262 PetscHeaderCreate(C,_p_Mat,MAT_COOKIE,MATSEQBAIJ,A->comm,MatDestroy,MatView); 2263 PLogObjectCreate(C); 2264 C->data = (void *) (c = PetscNew(Mat_SeqBAIJ)); CHKPTRQ(c); 2265 PetscMemcpy(&C->ops,&A->ops,sizeof(struct _MatOps)); 2266 C->destroy = MatDestroy_SeqBAIJ; 2267 C->view = MatView_SeqBAIJ; 2268 C->factor = A->factor; 2269 c->row = 0; 2270 c->col = 0; 2271 C->assembled = PETSC_TRUE; 2272 2273 c->m = C->m = a->m; 2274 c->n = C->n = a->n; 2275 C->M = a->m; 2276 C->N = a->n; 2277 2278 c->bs = a->bs; 2279 c->bs2 = a->bs2; 2280 c->mbs = a->mbs; 2281 c->nbs = a->nbs; 2282 2283 c->imax = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->imax); 2284 c->ilen = (int *) PetscMalloc((mbs+1)*sizeof(int)); CHKPTRQ(c->ilen); 2285 for ( i=0; i<mbs; i++ ) { 2286 c->imax[i] = a->imax[i]; 2287 c->ilen[i] = a->ilen[i]; 2288 } 2289 2290 /* allocate the matrix space */ 2291 c->singlemalloc = 1; 2292 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(Scalar) + sizeof(int)); 2293 c->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(c->a); 2294 c->j = (int *) (c->a + nz*bs2); 2295 c->i = c->j + nz; 2296 PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int)); 2297 if (mbs > 0) { 2298 PetscMemcpy(c->j,a->j,nz*sizeof(int)); 2299 if (cpvalues == COPY_VALUES) { 2300 PetscMemcpy(c->a,a->a,bs2*nz*sizeof(Scalar)); 2301 } 2302 } 2303 2304 PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 2305 c->sorted = a->sorted; 2306 c->roworiented = a->roworiented; 2307 c->nonew = a->nonew; 2308 2309 if (a->diag) { 2310 c->diag = (int *) PetscMalloc( (mbs+1)*sizeof(int) ); CHKPTRQ(c->diag); 2311 PLogObjectMemory(C,(mbs+1)*sizeof(int)); 2312 for ( i=0; i<mbs; i++ ) { 2313 c->diag[i] = a->diag[i]; 2314 } 2315 } 2316 else c->diag = 0; 2317 c->nz = a->nz; 2318 c->maxnz = a->maxnz; 2319 c->solve_work = 0; 2320 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 2321 c->mult_work = 0; 2322 *B = C; 2323 PetscFunctionReturn(0); 2324 } 2325 2326 #undef __FUNC__ 2327 #define __FUNC__ "MatLoad_SeqBAIJ" 2328 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 2329 { 2330 Mat_SeqBAIJ *a; 2331 Mat B; 2332 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1,flg; 2333 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 2334 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 2335 int *masked, nmask,tmp,bs2,ishift; 2336 Scalar *aa; 2337 MPI_Comm comm = ((PetscObject) viewer)->comm; 2338 2339 PetscFunctionBegin; 2340 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,&flg);CHKERRQ(ierr); 2341 bs2 = bs*bs; 2342 2343 MPI_Comm_size(comm,&size); 2344 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"view must have one processor"); 2345 ierr = ViewerBinaryGetDescriptor(viewer,&fd); CHKERRQ(ierr); 2346 ierr = PetscBinaryRead(fd,header,4,PETSC_INT); CHKERRQ(ierr); 2347 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not Mat object"); 2348 M = header[1]; N = header[2]; nz = header[3]; 2349 2350 if (header[3] < 0) { 2351 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as SeqBAIJ"); 2352 } 2353 2354 if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 2355 2356 /* 2357 This code adds extra rows to make sure the number of rows is 2358 divisible by the blocksize 2359 */ 2360 mbs = M/bs; 2361 extra_rows = bs - M + bs*(mbs); 2362 if (extra_rows == bs) extra_rows = 0; 2363 else mbs++; 2364 if (extra_rows) { 2365 PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 2366 } 2367 2368 /* read in row lengths */ 2369 rowlengths = (int*) PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 2370 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT); CHKERRQ(ierr); 2371 for ( i=0; i<extra_rows; i++ ) rowlengths[M+i] = 1; 2372 2373 /* read in column indices */ 2374 jj = (int*) PetscMalloc( (nz+extra_rows)*sizeof(int) ); CHKPTRQ(jj); 2375 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT); CHKERRQ(ierr); 2376 for ( i=0; i<extra_rows; i++ ) jj[nz+i] = M+i; 2377 2378 /* loop over row lengths determining block row lengths */ 2379 browlengths = (int *) PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 2380 PetscMemzero(browlengths,mbs*sizeof(int)); 2381 mask = (int *) PetscMalloc( 2*mbs*sizeof(int) ); CHKPTRQ(mask); 2382 PetscMemzero(mask,mbs*sizeof(int)); 2383 masked = mask + mbs; 2384 rowcount = 0; nzcount = 0; 2385 for ( i=0; i<mbs; i++ ) { 2386 nmask = 0; 2387 for ( j=0; j<bs; j++ ) { 2388 kmax = rowlengths[rowcount]; 2389 for ( k=0; k<kmax; k++ ) { 2390 tmp = jj[nzcount++]/bs; 2391 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 2392 } 2393 rowcount++; 2394 } 2395 browlengths[i] += nmask; 2396 /* zero out the mask elements we set */ 2397 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 2398 } 2399 2400 /* create our matrix */ 2401 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr); 2402 B = *A; 2403 a = (Mat_SeqBAIJ *) B->data; 2404 2405 /* set matrix "i" values */ 2406 a->i[0] = 0; 2407 for ( i=1; i<= mbs; i++ ) { 2408 a->i[i] = a->i[i-1] + browlengths[i-1]; 2409 a->ilen[i-1] = browlengths[i-1]; 2410 } 2411 a->nz = 0; 2412 for ( i=0; i<mbs; i++ ) a->nz += browlengths[i]; 2413 2414 /* read in nonzero values */ 2415 aa = (Scalar *) PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 2416 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR); CHKERRQ(ierr); 2417 for ( i=0; i<extra_rows; i++ ) aa[nz+i] = 1.0; 2418 2419 /* set "a" and "j" values into matrix */ 2420 nzcount = 0; jcount = 0; 2421 for ( i=0; i<mbs; i++ ) { 2422 nzcountb = nzcount; 2423 nmask = 0; 2424 for ( j=0; j<bs; j++ ) { 2425 kmax = rowlengths[i*bs+j]; 2426 for ( k=0; k<kmax; k++ ) { 2427 tmp = jj[nzcount++]/bs; 2428 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 2429 } 2430 rowcount++; 2431 } 2432 /* sort the masked values */ 2433 PetscSortInt(nmask,masked); 2434 2435 /* set "j" values into matrix */ 2436 maskcount = 1; 2437 for ( j=0; j<nmask; j++ ) { 2438 a->j[jcount++] = masked[j]; 2439 mask[masked[j]] = maskcount++; 2440 } 2441 /* set "a" values into matrix */ 2442 ishift = bs2*a->i[i]; 2443 for ( j=0; j<bs; j++ ) { 2444 kmax = rowlengths[i*bs+j]; 2445 for ( k=0; k<kmax; k++ ) { 2446 tmp = jj[nzcountb]/bs ; 2447 block = mask[tmp] - 1; 2448 point = jj[nzcountb] - bs*tmp; 2449 idx = ishift + bs2*block + j + bs*point; 2450 a->a[idx] = aa[nzcountb++]; 2451 } 2452 } 2453 /* zero out the mask elements we set */ 2454 for ( j=0; j<nmask; j++ ) mask[masked[j]] = 0; 2455 } 2456 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"Bad binary matrix"); 2457 2458 PetscFree(rowlengths); 2459 PetscFree(browlengths); 2460 PetscFree(aa); 2461 PetscFree(jj); 2462 PetscFree(mask); 2463 2464 B->assembled = PETSC_TRUE; 2465 2466 ierr = MatView_Private(B); CHKERRQ(ierr); 2467 PetscFunctionReturn(0); 2468 } 2469 2470 2471 2472