1 /*$Id: sbaij.c,v 1.62 2001/08/07 03:03:01 balay Exp $*/ 2 3 /* 4 Defines the basic matrix operations for the SBAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 8 #include "src/vec/vecimpl.h" 9 #include "src/inline/spops.h" 10 #include "src/mat/impls/sbaij/seq/sbaij.h" 11 12 #define CHUNKSIZE 10 13 14 /* 15 Checks for missing diagonals 16 */ 17 #undef __FUNCT__ 18 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 19 int MatMissingDiagonal_SeqSBAIJ(Mat A) 20 { 21 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 22 int *diag,*jj = a->j,i,ierr; 23 24 PetscFunctionBegin; 25 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 26 diag = a->diag; 27 for (i=0; i<a->mbs; i++) { 28 if (jj[diag[i]] != i) SETERRQ1(1,"Matrix is missing diagonal number %d",i); 29 } 30 PetscFunctionReturn(0); 31 } 32 33 #undef __FUNCT__ 34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 35 int MatMarkDiagonal_SeqSBAIJ(Mat A) 36 { 37 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 38 int i,mbs = a->mbs,ierr; 39 40 PetscFunctionBegin; 41 if (a->diag) PetscFunctionReturn(0); 42 43 ierr = PetscMalloc((mbs+1)*sizeof(int),&a->diag);CHKERRQ(ierr); 44 PetscLogObjectMemory(A,(mbs+1)*sizeof(int)); 45 for (i=0; i<mbs; i++) a->diag[i] = a->i[i]; 46 PetscFunctionReturn(0); 47 } 48 49 #undef __FUNCT__ 50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 51 static int MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done) 52 { 53 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 54 int n = a->mbs,i; 55 56 PetscFunctionBegin; 57 *nn = n; 58 if (!ia) PetscFunctionReturn(0); 59 60 if (oshift == 1) { 61 /* temporarily add 1 to i and j indices */ 62 int nz = a->i[n]; 63 for (i=0; i<nz; i++) a->j[i]++; 64 for (i=0; i<n+1; i++) a->i[i]++; 65 *ia = a->i; *ja = a->j; 66 } else { 67 *ia = a->i; *ja = a->j; 68 } 69 PetscFunctionReturn(0); 70 } 71 72 #undef __FUNCT__ 73 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 74 static int MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done) 75 { 76 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 77 int i,n = a->mbs; 78 79 PetscFunctionBegin; 80 if (!ia) PetscFunctionReturn(0); 81 82 if (oshift == 1) { 83 int nz = a->i[n]-1; 84 for (i=0; i<nz; i++) a->j[i]--; 85 for (i=0; i<n+1; i++) a->i[i]--; 86 } 87 PetscFunctionReturn(0); 88 } 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatGetBlockSize_SeqSBAIJ" 92 int MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs) 93 { 94 Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data; 95 96 PetscFunctionBegin; 97 *bs = sbaij->bs; 98 PetscFunctionReturn(0); 99 } 100 101 #undef __FUNCT__ 102 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 103 int MatDestroy_SeqSBAIJ(Mat A) 104 { 105 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 106 int ierr; 107 108 PetscFunctionBegin; 109 #if defined(PETSC_USE_LOG) 110 PetscLogObjectState((PetscObject)A,"Rows=%d, NZ=%d",A->m,a->nz); 111 #endif 112 ierr = PetscFree(a->a);CHKERRQ(ierr); 113 if (!a->singlemalloc) { 114 ierr = PetscFree(a->i);CHKERRQ(ierr); 115 ierr = PetscFree(a->j);CHKERRQ(ierr); 116 } 117 if (a->row) { 118 ierr = ISDestroy(a->row);CHKERRQ(ierr); 119 } 120 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 121 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 122 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 123 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 124 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 125 if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);} 126 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 127 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 128 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 129 130 if (a->inew){ 131 ierr = PetscFree(a->inew);CHKERRQ(ierr); 132 a->inew = 0; 133 } 134 ierr = PetscFree(a);CHKERRQ(ierr); 135 PetscFunctionReturn(0); 136 } 137 138 #undef __FUNCT__ 139 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 140 int MatSetOption_SeqSBAIJ(Mat A,MatOption op) 141 { 142 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 143 144 PetscFunctionBegin; 145 switch (op) { 146 case MAT_ROW_ORIENTED: 147 a->roworiented = PETSC_TRUE; 148 break; 149 case MAT_COLUMN_ORIENTED: 150 a->roworiented = PETSC_FALSE; 151 break; 152 case MAT_COLUMNS_SORTED: 153 a->sorted = PETSC_TRUE; 154 break; 155 case MAT_COLUMNS_UNSORTED: 156 a->sorted = PETSC_FALSE; 157 break; 158 case MAT_KEEP_ZEROED_ROWS: 159 a->keepzeroedrows = PETSC_TRUE; 160 break; 161 case MAT_NO_NEW_NONZERO_LOCATIONS: 162 a->nonew = 1; 163 break; 164 case MAT_NEW_NONZERO_LOCATION_ERR: 165 a->nonew = -1; 166 break; 167 case MAT_NEW_NONZERO_ALLOCATION_ERR: 168 a->nonew = -2; 169 break; 170 case MAT_YES_NEW_NONZERO_LOCATIONS: 171 a->nonew = 0; 172 break; 173 case MAT_ROWS_SORTED: 174 case MAT_ROWS_UNSORTED: 175 case MAT_YES_NEW_DIAGONALS: 176 case MAT_IGNORE_OFF_PROC_ENTRIES: 177 case MAT_USE_HASH_TABLE: 178 PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n"); 179 break; 180 case MAT_NO_NEW_DIAGONALS: 181 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 182 case MAT_NOT_SYMMETRIC: 183 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 184 case MAT_HERMITIAN: 185 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 186 case MAT_SYMMETRIC: 187 case MAT_STRUCTURALLY_SYMMETRIC: 188 case MAT_NOT_HERMITIAN: 189 case MAT_SYMMETRY_ETERNAL: 190 case MAT_NOT_SYMMETRY_ETERNAL: 191 break; 192 default: 193 SETERRQ(PETSC_ERR_SUP,"unknown option"); 194 } 195 PetscFunctionReturn(0); 196 } 197 198 #undef __FUNCT__ 199 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 200 int MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,PetscScalar **v) 201 { 202 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 203 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2,ierr; 204 MatScalar *aa,*aa_i; 205 PetscScalar *v_i; 206 207 PetscFunctionBegin; 208 bs = a->bs; 209 ai = a->i; 210 aj = a->j; 211 aa = a->a; 212 bs2 = a->bs2; 213 214 if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %d out of range", row); 215 216 bn = row/bs; /* Block number */ 217 bp = row % bs; /* Block position */ 218 M = ai[bn+1] - ai[bn]; 219 *ncols = bs*M; 220 221 if (v) { 222 *v = 0; 223 if (*ncols) { 224 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 225 for (i=0; i<M; i++) { /* for each block in the block row */ 226 v_i = *v + i*bs; 227 aa_i = aa + bs2*(ai[bn] + i); 228 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 229 } 230 } 231 } 232 233 if (cols) { 234 *cols = 0; 235 if (*ncols) { 236 ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr); 237 for (i=0; i<M; i++) { /* for each block in the block row */ 238 cols_i = *cols + i*bs; 239 itmp = bs*aj[ai[bn] + i]; 240 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 241 } 242 } 243 } 244 245 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 246 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 247 #ifdef column_search 248 v_i = *v + M*bs; 249 cols_i = *cols + M*bs; 250 for (i=0; i<bn; i++){ /* for each block row */ 251 M = ai[i+1] - ai[i]; 252 for (j=0; j<M; j++){ 253 itmp = aj[ai[i] + j]; /* block column value */ 254 if (itmp == bn){ 255 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 256 for (k=0; k<bs; k++) { 257 *cols_i++ = i*bs+k; 258 *v_i++ = aa_i[k]; 259 } 260 *ncols += bs; 261 break; 262 } 263 } 264 } 265 #endif 266 267 PetscFunctionReturn(0); 268 } 269 270 #undef __FUNCT__ 271 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 272 int MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 273 { 274 int ierr; 275 276 PetscFunctionBegin; 277 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 278 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 279 PetscFunctionReturn(0); 280 } 281 282 #undef __FUNCT__ 283 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 284 int MatTranspose_SeqSBAIJ(Mat A,Mat *B) 285 { 286 int ierr; 287 PetscFunctionBegin; 288 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 289 PetscFunctionReturn(0); 290 } 291 292 #undef __FUNCT__ 293 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 294 static int MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 295 { 296 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 297 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 298 char *name; 299 PetscViewerFormat format; 300 301 PetscFunctionBegin; 302 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 303 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 304 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 305 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 306 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 307 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 308 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 309 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 310 for (i=0; i<a->mbs; i++) { 311 for (j=0; j<bs; j++) { 312 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 313 for (k=a->i[i]; k<a->i[i+1]; k++) { 314 for (l=0; l<bs; l++) { 315 #if defined(PETSC_USE_COMPLEX) 316 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 317 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 318 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 319 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 320 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 321 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 322 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 323 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 324 } 325 #else 326 if (a->a[bs2*k + l*bs + j] != 0.0) { 327 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 328 } 329 #endif 330 } 331 } 332 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 333 } 334 } 335 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 336 } else { 337 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 338 for (i=0; i<a->mbs; i++) { 339 for (j=0; j<bs; j++) { 340 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 341 for (k=a->i[i]; k<a->i[i+1]; k++) { 342 for (l=0; l<bs; l++) { 343 #if defined(PETSC_USE_COMPLEX) 344 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 345 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 346 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 347 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 348 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 349 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 350 } else { 351 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 352 } 353 #else 354 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 355 #endif 356 } 357 } 358 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 359 } 360 } 361 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 362 } 363 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 364 PetscFunctionReturn(0); 365 } 366 367 #undef __FUNCT__ 368 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 369 static int MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 370 { 371 Mat A = (Mat) Aa; 372 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 373 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank; 374 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 375 MatScalar *aa; 376 MPI_Comm comm; 377 PetscViewer viewer; 378 379 PetscFunctionBegin; 380 /* 381 This is nasty. If this is called from an originally parallel matrix 382 then all processes call this,but only the first has the matrix so the 383 rest should return immediately. 384 */ 385 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 386 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 387 if (rank) PetscFunctionReturn(0); 388 389 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 390 391 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 392 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 393 394 /* loop over matrix elements drawing boxes */ 395 color = PETSC_DRAW_BLUE; 396 for (i=0,row=0; i<mbs; i++,row+=bs) { 397 for (j=a->i[i]; j<a->i[i+1]; j++) { 398 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 399 x_l = a->j[j]*bs; x_r = x_l + 1.0; 400 aa = a->a + j*bs2; 401 for (k=0; k<bs; k++) { 402 for (l=0; l<bs; l++) { 403 if (PetscRealPart(*aa++) >= 0.) continue; 404 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 405 } 406 } 407 } 408 } 409 color = PETSC_DRAW_CYAN; 410 for (i=0,row=0; i<mbs; i++,row+=bs) { 411 for (j=a->i[i]; j<a->i[i+1]; j++) { 412 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 413 x_l = a->j[j]*bs; x_r = x_l + 1.0; 414 aa = a->a + j*bs2; 415 for (k=0; k<bs; k++) { 416 for (l=0; l<bs; l++) { 417 if (PetscRealPart(*aa++) != 0.) continue; 418 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 419 } 420 } 421 } 422 } 423 424 color = PETSC_DRAW_RED; 425 for (i=0,row=0; i<mbs; i++,row+=bs) { 426 for (j=a->i[i]; j<a->i[i+1]; j++) { 427 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 428 x_l = a->j[j]*bs; x_r = x_l + 1.0; 429 aa = a->a + j*bs2; 430 for (k=0; k<bs; k++) { 431 for (l=0; l<bs; l++) { 432 if (PetscRealPart(*aa++) <= 0.) continue; 433 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 434 } 435 } 436 } 437 } 438 PetscFunctionReturn(0); 439 } 440 441 #undef __FUNCT__ 442 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 443 static int MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 444 { 445 int ierr; 446 PetscReal xl,yl,xr,yr,w,h; 447 PetscDraw draw; 448 PetscTruth isnull; 449 450 PetscFunctionBegin; 451 452 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 453 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 454 455 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 456 xr = A->m; yr = A->m; h = yr/10.0; w = xr/10.0; 457 xr += w; yr += h; xl = -w; yl = -h; 458 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 459 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 460 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 461 PetscFunctionReturn(0); 462 } 463 464 #undef __FUNCT__ 465 #define __FUNCT__ "MatView_SeqSBAIJ" 466 int MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 467 { 468 int ierr; 469 PetscTruth isascii,isdraw; 470 471 PetscFunctionBegin; 472 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 473 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 474 if (isascii){ 475 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 476 } else if (isdraw) { 477 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 478 } else { 479 Mat B; 480 ierr = MatConvert(A,MATSEQAIJ,&B);CHKERRQ(ierr); 481 ierr = MatView(B,viewer);CHKERRQ(ierr); 482 ierr = MatDestroy(B);CHKERRQ(ierr); 483 } 484 PetscFunctionReturn(0); 485 } 486 487 488 #undef __FUNCT__ 489 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 490 int MatGetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[]) 491 { 492 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 493 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 494 int *ai = a->i,*ailen = a->ilen; 495 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 496 MatScalar *ap,*aa = a->a,zero = 0.0; 497 498 PetscFunctionBegin; 499 for (k=0; k<m; k++) { /* loop over rows */ 500 row = im[k]; brow = row/bs; 501 if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",row); 502 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1); 503 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 504 nrow = ailen[brow]; 505 for (l=0; l<n; l++) { /* loop over columns */ 506 if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",in[l]); 507 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1); 508 col = in[l] ; 509 bcol = col/bs; 510 cidx = col%bs; 511 ridx = row%bs; 512 high = nrow; 513 low = 0; /* assume unsorted */ 514 while (high-low > 5) { 515 t = (low+high)/2; 516 if (rp[t] > bcol) high = t; 517 else low = t; 518 } 519 for (i=low; i<high; i++) { 520 if (rp[i] > bcol) break; 521 if (rp[i] == bcol) { 522 *v++ = ap[bs2*i+bs*cidx+ridx]; 523 goto finished; 524 } 525 } 526 *v++ = zero; 527 finished:; 528 } 529 } 530 PetscFunctionReturn(0); 531 } 532 533 534 #undef __FUNCT__ 535 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 536 int MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is) 537 { 538 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 539 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 540 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 541 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 542 PetscTruth roworiented=a->roworiented; 543 const MatScalar *value = v; 544 MatScalar *ap,*aa = a->a,*bap; 545 546 PetscFunctionBegin; 547 if (roworiented) { 548 stepval = (n-1)*bs; 549 } else { 550 stepval = (m-1)*bs; 551 } 552 for (k=0; k<m; k++) { /* loop over added rows */ 553 row = im[k]; 554 if (row < 0) continue; 555 #if defined(PETSC_USE_BOPT_g) 556 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1); 557 #endif 558 rp = aj + ai[row]; 559 ap = aa + bs2*ai[row]; 560 rmax = imax[row]; 561 nrow = ailen[row]; 562 low = 0; 563 for (l=0; l<n; l++) { /* loop over added columns */ 564 if (in[l] < 0) continue; 565 col = in[l]; 566 #if defined(PETSC_USE_BOPT_g) 567 if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",col,a->nbs-1); 568 #endif 569 if (col < row) continue; /* ignore lower triangular block */ 570 if (roworiented) { 571 value = v + k*(stepval+bs)*bs + l*bs; 572 } else { 573 value = v + l*(stepval+bs)*bs + k*bs; 574 } 575 if (!sorted) low = 0; high = nrow; 576 while (high-low > 7) { 577 t = (low+high)/2; 578 if (rp[t] > col) high = t; 579 else low = t; 580 } 581 for (i=low; i<high; i++) { 582 if (rp[i] > col) break; 583 if (rp[i] == col) { 584 bap = ap + bs2*i; 585 if (roworiented) { 586 if (is == ADD_VALUES) { 587 for (ii=0; ii<bs; ii++,value+=stepval) { 588 for (jj=ii; jj<bs2; jj+=bs) { 589 bap[jj] += *value++; 590 } 591 } 592 } else { 593 for (ii=0; ii<bs; ii++,value+=stepval) { 594 for (jj=ii; jj<bs2; jj+=bs) { 595 bap[jj] = *value++; 596 } 597 } 598 } 599 } else { 600 if (is == ADD_VALUES) { 601 for (ii=0; ii<bs; ii++,value+=stepval) { 602 for (jj=0; jj<bs; jj++) { 603 *bap++ += *value++; 604 } 605 } 606 } else { 607 for (ii=0; ii<bs; ii++,value+=stepval) { 608 for (jj=0; jj<bs; jj++) { 609 *bap++ = *value++; 610 } 611 } 612 } 613 } 614 goto noinsert2; 615 } 616 } 617 if (nonew == 1) goto noinsert2; 618 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 619 if (nrow >= rmax) { 620 /* there is no extra room in row, therefore enlarge */ 621 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 622 MatScalar *new_a; 623 624 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 625 626 /* malloc new storage space */ 627 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 628 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 629 new_j = (int*)(new_a + bs2*new_nz); 630 new_i = new_j + new_nz; 631 632 /* copy over old data into new slots */ 633 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 634 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 635 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 636 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 637 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 638 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 639 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 640 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 641 /* free up old matrix storage */ 642 ierr = PetscFree(a->a);CHKERRQ(ierr); 643 if (!a->singlemalloc) { 644 ierr = PetscFree(a->i);CHKERRQ(ierr); 645 ierr = PetscFree(a->j);CHKERRQ(ierr); 646 } 647 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 648 a->singlemalloc = PETSC_TRUE; 649 650 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 651 rmax = imax[row] = imax[row] + CHUNKSIZE; 652 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 653 a->maxnz += bs2*CHUNKSIZE; 654 a->reallocs++; 655 a->nz++; 656 } 657 N = nrow++ - 1; 658 /* shift up all the later entries in this row */ 659 for (ii=N; ii>=i; ii--) { 660 rp[ii+1] = rp[ii]; 661 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 662 } 663 if (N >= i) { 664 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 665 } 666 rp[i] = col; 667 bap = ap + bs2*i; 668 if (roworiented) { 669 for (ii=0; ii<bs; ii++,value+=stepval) { 670 for (jj=ii; jj<bs2; jj+=bs) { 671 bap[jj] = *value++; 672 } 673 } 674 } else { 675 for (ii=0; ii<bs; ii++,value+=stepval) { 676 for (jj=0; jj<bs; jj++) { 677 *bap++ = *value++; 678 } 679 } 680 } 681 noinsert2:; 682 low = i; 683 } 684 ailen[row] = nrow; 685 } 686 PetscFunctionReturn(0); 687 } 688 689 #undef __FUNCT__ 690 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 691 int MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 692 { 693 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 694 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 695 int m = A->m,*ip,N,*ailen = a->ilen; 696 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 697 MatScalar *aa = a->a,*ap; 698 699 PetscFunctionBegin; 700 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 701 702 if (m) rmax = ailen[0]; 703 for (i=1; i<mbs; i++) { 704 /* move each row back by the amount of empty slots (fshift) before it*/ 705 fshift += imax[i-1] - ailen[i-1]; 706 rmax = PetscMax(rmax,ailen[i]); 707 if (fshift) { 708 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 709 N = ailen[i]; 710 for (j=0; j<N; j++) { 711 ip[j-fshift] = ip[j]; 712 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 713 } 714 } 715 ai[i] = ai[i-1] + ailen[i-1]; 716 } 717 if (mbs) { 718 fshift += imax[mbs-1] - ailen[mbs-1]; 719 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 720 } 721 /* reset ilen and imax for each row */ 722 for (i=0; i<mbs; i++) { 723 ailen[i] = imax[i] = ai[i+1] - ai[i]; 724 } 725 a->nz = ai[mbs]; 726 727 /* diagonals may have moved, reset it */ 728 if (a->diag) { 729 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(int));CHKERRQ(ierr); 730 } 731 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 732 m,A->m,a->bs,fshift*bs2,a->nz*bs2); 733 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n", 734 a->reallocs); 735 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 736 a->reallocs = 0; 737 A->info.nz_unneeded = (PetscReal)fshift*bs2; 738 739 PetscFunctionReturn(0); 740 } 741 742 /* 743 This function returns an array of flags which indicate the locations of contiguous 744 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 745 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 746 Assume: sizes should be long enough to hold all the values. 747 */ 748 #undef __FUNCT__ 749 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 750 int MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 751 { 752 int i,j,k,row; 753 PetscTruth flg; 754 755 PetscFunctionBegin; 756 for (i=0,j=0; i<n; j++) { 757 row = idx[i]; 758 if (row%bs!=0) { /* Not the begining of a block */ 759 sizes[j] = 1; 760 i++; 761 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 762 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 763 i++; 764 } else { /* Begining of the block, so check if the complete block exists */ 765 flg = PETSC_TRUE; 766 for (k=1; k<bs; k++) { 767 if (row+k != idx[i+k]) { /* break in the block */ 768 flg = PETSC_FALSE; 769 break; 770 } 771 } 772 if (flg == PETSC_TRUE) { /* No break in the bs */ 773 sizes[j] = bs; 774 i+= bs; 775 } else { 776 sizes[j] = 1; 777 i++; 778 } 779 } 780 } 781 *bs_max = j; 782 PetscFunctionReturn(0); 783 } 784 785 #undef __FUNCT__ 786 #define __FUNCT__ "MatZeroRows_SeqSBAIJ" 787 int MatZeroRows_SeqSBAIJ(Mat A,IS is,const PetscScalar *diag) 788 { 789 PetscFunctionBegin; 790 SETERRQ(PETSC_ERR_SUP,"No support for this function yet"); 791 } 792 793 /* Only add/insert a(i,j) with i<=j (blocks). 794 Any a(i,j) with i>j input by user is ingored. 795 */ 796 797 #undef __FUNCT__ 798 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 799 int MatSetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is) 800 { 801 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 802 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 803 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 804 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 805 int ridx,cidx,bs2=a->bs2,ierr; 806 MatScalar *ap,value,*aa=a->a,*bap; 807 808 PetscFunctionBegin; 809 810 for (k=0; k<m; k++) { /* loop over added rows */ 811 row = im[k]; /* row number */ 812 brow = row/bs; /* block row number */ 813 if (row < 0) continue; 814 #if defined(PETSC_USE_BOPT_g) 815 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1); 816 #endif 817 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 818 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 819 rmax = imax[brow]; /* maximum space allocated for this row */ 820 nrow = ailen[brow]; /* actual length of this row */ 821 low = 0; 822 823 for (l=0; l<n; l++) { /* loop over added columns */ 824 if (in[l] < 0) continue; 825 #if defined(PETSC_USE_BOPT_g) 826 if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m-1); 827 #endif 828 col = in[l]; 829 bcol = col/bs; /* block col number */ 830 831 if (brow <= bcol){ 832 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 833 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 834 /* element value a(k,l) */ 835 if (roworiented) { 836 value = v[l + k*n]; 837 } else { 838 value = v[k + l*m]; 839 } 840 841 /* move pointer bap to a(k,l) quickly and add/insert value */ 842 if (!sorted) low = 0; high = nrow; 843 while (high-low > 7) { 844 t = (low+high)/2; 845 if (rp[t] > bcol) high = t; 846 else low = t; 847 } 848 for (i=low; i<high; i++) { 849 /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */ 850 if (rp[i] > bcol) break; 851 if (rp[i] == bcol) { 852 bap = ap + bs2*i + bs*cidx + ridx; 853 if (is == ADD_VALUES) *bap += value; 854 else *bap = value; 855 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 856 if (brow == bcol && ridx < cidx){ 857 bap = ap + bs2*i + bs*ridx + cidx; 858 if (is == ADD_VALUES) *bap += value; 859 else *bap = value; 860 } 861 goto noinsert1; 862 } 863 } 864 865 if (nonew == 1) goto noinsert1; 866 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 867 if (nrow >= rmax) { 868 /* there is no extra room in row, therefore enlarge */ 869 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 870 MatScalar *new_a; 871 872 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 873 874 /* Malloc new storage space */ 875 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 876 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 877 new_j = (int*)(new_a + bs2*new_nz); 878 new_i = new_j + new_nz; 879 880 /* copy over old data into new slots */ 881 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 882 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 883 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 884 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 885 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 886 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 887 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 888 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 889 /* free up old matrix storage */ 890 ierr = PetscFree(a->a);CHKERRQ(ierr); 891 if (!a->singlemalloc) { 892 ierr = PetscFree(a->i);CHKERRQ(ierr); 893 ierr = PetscFree(a->j);CHKERRQ(ierr); 894 } 895 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 896 a->singlemalloc = PETSC_TRUE; 897 898 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 899 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 900 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 901 a->maxnz += bs2*CHUNKSIZE; 902 a->reallocs++; 903 a->nz++; 904 } 905 906 N = nrow++ - 1; 907 /* shift up all the later entries in this row */ 908 for (ii=N; ii>=i; ii--) { 909 rp[ii+1] = rp[ii]; 910 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 911 } 912 if (N>=i) { 913 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 914 } 915 rp[i] = bcol; 916 ap[bs2*i + bs*cidx + ridx] = value; 917 noinsert1:; 918 low = i; 919 /* } */ 920 } 921 } /* end of if .. if.. */ 922 } /* end of loop over added columns */ 923 ailen[brow] = nrow; 924 } /* end of loop over added rows */ 925 926 PetscFunctionReturn(0); 927 } 928 929 extern int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*); 930 extern int MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*); 931 extern int MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS[],int); 932 extern int MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*); 933 extern int MatGetSubMatrices_SeqSBAIJ(Mat,int,const IS[],const IS[],MatReuse,Mat*[]); 934 extern int MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec); 935 extern int MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec); 936 extern int MatScale_SeqSBAIJ(const PetscScalar*,Mat); 937 extern int MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *); 938 extern int MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*); 939 extern int MatGetDiagonal_SeqSBAIJ(Mat,Vec); 940 extern int MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec); 941 extern int MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *); 942 extern int MatZeroEntries_SeqSBAIJ(Mat); 943 extern int MatGetRowMax_SeqSBAIJ(Mat,Vec); 944 945 extern int MatSolve_SeqSBAIJ_N(Mat,Vec,Vec); 946 extern int MatSolve_SeqSBAIJ_1(Mat,Vec,Vec); 947 extern int MatSolve_SeqSBAIJ_2(Mat,Vec,Vec); 948 extern int MatSolve_SeqSBAIJ_3(Mat,Vec,Vec); 949 extern int MatSolve_SeqSBAIJ_4(Mat,Vec,Vec); 950 extern int MatSolve_SeqSBAIJ_5(Mat,Vec,Vec); 951 extern int MatSolve_SeqSBAIJ_6(Mat,Vec,Vec); 952 extern int MatSolve_SeqSBAIJ_7(Mat,Vec,Vec); 953 extern int MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec); 954 extern int MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec); 955 extern int MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec); 956 extern int MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec); 957 extern int MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec); 958 extern int MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec); 959 extern int MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec); 960 961 extern int MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs); 962 963 extern int MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec); 964 extern int MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec); 965 extern int MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec); 966 extern int MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 967 extern int MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec); 968 extern int MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec); 969 extern int MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec); 970 extern int MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec); 971 972 extern int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*); 973 extern int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*); 974 extern int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*); 975 extern int MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*); 976 extern int MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*); 977 extern int MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*); 978 extern int MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*); 979 extern int MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*); 980 extern int MatGetInertia_SeqSBAIJ(Mat,int*,int*,int*); 981 982 extern int MatMult_SeqSBAIJ_1(Mat,Vec,Vec); 983 extern int MatMult_SeqSBAIJ_2(Mat,Vec,Vec); 984 extern int MatMult_SeqSBAIJ_3(Mat,Vec,Vec); 985 extern int MatMult_SeqSBAIJ_4(Mat,Vec,Vec); 986 extern int MatMult_SeqSBAIJ_5(Mat,Vec,Vec); 987 extern int MatMult_SeqSBAIJ_6(Mat,Vec,Vec); 988 extern int MatMult_SeqSBAIJ_7(Mat,Vec,Vec); 989 extern int MatMult_SeqSBAIJ_N(Mat,Vec,Vec); 990 991 extern int MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec); 992 extern int MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec); 993 extern int MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec); 994 extern int MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec); 995 extern int MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec); 996 extern int MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec); 997 extern int MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec); 998 extern int MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec); 999 1000 #ifdef HAVE_MatICCFactor 1001 /* modefied from MatILUFactor_SeqSBAIJ, needs further work! */ 1002 #undef __FUNCT__ 1003 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 1004 int MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info) 1005 { 1006 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 1007 Mat outA; 1008 int ierr; 1009 PetscTruth row_identity,col_identity; 1010 1011 PetscFunctionBegin; 1012 /* 1013 if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1014 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1015 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1016 if (!row_identity || !col_identity) { 1017 SETERRQ(1,"Row and column permutations must be identity for in-place ICC"); 1018 } 1019 */ 1020 1021 outA = inA; 1022 inA->factor = FACTOR_CHOLESKY; 1023 1024 if (!a->diag) { 1025 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 1026 } 1027 /* 1028 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1029 for ILU(0) factorization with natural ordering 1030 */ 1031 switch (a->bs) { 1032 case 1: 1033 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1034 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1035 inA->ops->solves = MatSolves_SeqSBAIJ_1; 1036 PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n"); 1037 case 2: 1038 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 1039 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1040 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1041 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1042 break; 1043 case 3: 1044 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 1045 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1046 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1047 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n"); 1048 break; 1049 case 4: 1050 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 1051 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1052 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1053 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1054 break; 1055 case 5: 1056 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 1057 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1058 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1059 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1060 break; 1061 case 6: 1062 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 1063 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1064 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1065 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1066 break; 1067 case 7: 1068 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 1069 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1070 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1071 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1072 break; 1073 default: 1074 a->row = row; 1075 a->icol = col; 1076 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1077 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1078 1079 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1080 ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr); 1081 PetscLogObjectParent(inA,a->icol); 1082 1083 if (!a->solve_work) { 1084 ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1085 PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar)); 1086 } 1087 } 1088 1089 ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr); 1090 1091 PetscFunctionReturn(0); 1092 } 1093 #endif 1094 1095 #undef __FUNCT__ 1096 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ" 1097 int MatPrintHelp_SeqSBAIJ(Mat A) 1098 { 1099 static PetscTruth called = PETSC_FALSE; 1100 MPI_Comm comm = A->comm; 1101 int ierr; 1102 1103 PetscFunctionBegin; 1104 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1105 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1106 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1107 PetscFunctionReturn(0); 1108 } 1109 1110 EXTERN_C_BEGIN 1111 #undef __FUNCT__ 1112 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1113 int MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices) 1114 { 1115 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1116 int i,nz,n; 1117 1118 PetscFunctionBegin; 1119 nz = baij->maxnz; 1120 n = mat->n; 1121 for (i=0; i<nz; i++) { 1122 baij->j[i] = indices[i]; 1123 } 1124 baij->nz = nz; 1125 for (i=0; i<n; i++) { 1126 baij->ilen[i] = baij->imax[i]; 1127 } 1128 1129 PetscFunctionReturn(0); 1130 } 1131 EXTERN_C_END 1132 1133 #undef __FUNCT__ 1134 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1135 /*@ 1136 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1137 in the matrix. 1138 1139 Input Parameters: 1140 + mat - the SeqSBAIJ matrix 1141 - indices - the column indices 1142 1143 Level: advanced 1144 1145 Notes: 1146 This can be called if you have precomputed the nonzero structure of the 1147 matrix and want to provide it to the matrix object to improve the performance 1148 of the MatSetValues() operation. 1149 1150 You MUST have set the correct numbers of nonzeros per row in the call to 1151 MatCreateSeqSBAIJ(). 1152 1153 MUST be called before any calls to MatSetValues() 1154 1155 .seealso: MatCreateSeqSBAIJ 1156 @*/ 1157 int MatSeqSBAIJSetColumnIndices(Mat mat,int *indices) 1158 { 1159 int ierr,(*f)(Mat,int *); 1160 1161 PetscFunctionBegin; 1162 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1163 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1164 if (f) { 1165 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1166 } else { 1167 SETERRQ(1,"Wrong type of matrix to set column indices"); 1168 } 1169 PetscFunctionReturn(0); 1170 } 1171 1172 #undef __FUNCT__ 1173 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1174 int MatSetUpPreallocation_SeqSBAIJ(Mat A) 1175 { 1176 int ierr; 1177 1178 PetscFunctionBegin; 1179 ierr = MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1180 PetscFunctionReturn(0); 1181 } 1182 1183 #undef __FUNCT__ 1184 #define __FUNCT__ "MatGetArray_SeqSBAIJ" 1185 int MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1186 { 1187 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1188 PetscFunctionBegin; 1189 *array = a->a; 1190 PetscFunctionReturn(0); 1191 } 1192 1193 #undef __FUNCT__ 1194 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ" 1195 int MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1196 { 1197 PetscFunctionBegin; 1198 PetscFunctionReturn(0); 1199 } 1200 1201 #include "petscblaslapack.h" 1202 #undef __FUNCT__ 1203 #define __FUNCT__ "MatAXPY_SeqSBAIJ" 1204 int MatAXPY_SeqSBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str) 1205 { 1206 Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data; 1207 int ierr,one=1,i,bs=y->bs,bs2,j; 1208 1209 PetscFunctionBegin; 1210 if (str == SAME_NONZERO_PATTERN) { 1211 BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one); 1212 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1213 if (y->xtoy && y->XtoY != X) { 1214 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1215 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1216 } 1217 if (!y->xtoy) { /* get xtoy */ 1218 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1219 y->XtoY = X; 1220 } 1221 bs2 = bs*bs; 1222 for (i=0; i<x->nz; i++) { 1223 j = 0; 1224 while (j < bs2){ 1225 y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]); 1226 j++; 1227 } 1228 } 1229 PetscLogInfo(0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %d/%d = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)); 1230 } else { 1231 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1232 } 1233 PetscFunctionReturn(0); 1234 } 1235 1236 #undef __FUNCT__ 1237 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ" 1238 int MatIsSymmetric_SeqSBAIJ(Mat A,PetscTruth *flg) 1239 { 1240 PetscFunctionBegin; 1241 *flg = PETSC_TRUE; 1242 PetscFunctionReturn(0); 1243 } 1244 1245 #undef __FUNCT__ 1246 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ" 1247 int MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg) 1248 { 1249 PetscFunctionBegin; 1250 *flg = PETSC_TRUE; 1251 PetscFunctionReturn(0); 1252 } 1253 1254 #undef __FUNCT__ 1255 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ" 1256 int MatIsHermitian_SeqSBAIJ(Mat A,PetscTruth *flg) 1257 { 1258 PetscFunctionBegin; 1259 *flg = PETSC_FALSE; 1260 PetscFunctionReturn(0); 1261 } 1262 1263 /* -------------------------------------------------------------------*/ 1264 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1265 MatGetRow_SeqSBAIJ, 1266 MatRestoreRow_SeqSBAIJ, 1267 MatMult_SeqSBAIJ_N, 1268 /* 4*/ MatMultAdd_SeqSBAIJ_N, 1269 MatMultTranspose_SeqSBAIJ, 1270 MatMultTransposeAdd_SeqSBAIJ, 1271 MatSolve_SeqSBAIJ_N, 1272 0, 1273 0, 1274 /*10*/ 0, 1275 0, 1276 MatCholeskyFactor_SeqSBAIJ, 1277 MatRelax_SeqSBAIJ, 1278 MatTranspose_SeqSBAIJ, 1279 /*15*/ MatGetInfo_SeqSBAIJ, 1280 MatEqual_SeqSBAIJ, 1281 MatGetDiagonal_SeqSBAIJ, 1282 MatDiagonalScale_SeqSBAIJ, 1283 MatNorm_SeqSBAIJ, 1284 /*20*/ 0, 1285 MatAssemblyEnd_SeqSBAIJ, 1286 0, 1287 MatSetOption_SeqSBAIJ, 1288 MatZeroEntries_SeqSBAIJ, 1289 /*25*/ MatZeroRows_SeqSBAIJ, 1290 0, 1291 0, 1292 MatCholeskyFactorSymbolic_SeqSBAIJ, 1293 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1294 /*30*/ MatSetUpPreallocation_SeqSBAIJ, 1295 0, 1296 MatICCFactorSymbolic_SeqSBAIJ, 1297 MatGetArray_SeqSBAIJ, 1298 MatRestoreArray_SeqSBAIJ, 1299 /*35*/ MatDuplicate_SeqSBAIJ, 1300 0, 1301 0, 1302 0, 1303 0, 1304 /*40*/ MatAXPY_SeqSBAIJ, 1305 MatGetSubMatrices_SeqSBAIJ, 1306 MatIncreaseOverlap_SeqSBAIJ, 1307 MatGetValues_SeqSBAIJ, 1308 0, 1309 /*45*/ MatPrintHelp_SeqSBAIJ, 1310 MatScale_SeqSBAIJ, 1311 0, 1312 0, 1313 0, 1314 /*50*/ MatGetBlockSize_SeqSBAIJ, 1315 MatGetRowIJ_SeqSBAIJ, 1316 MatRestoreRowIJ_SeqSBAIJ, 1317 0, 1318 0, 1319 /*55*/ 0, 1320 0, 1321 0, 1322 0, 1323 MatSetValuesBlocked_SeqSBAIJ, 1324 /*60*/ MatGetSubMatrix_SeqSBAIJ, 1325 0, 1326 0, 1327 MatGetPetscMaps_Petsc, 1328 0, 1329 /*65*/ 0, 1330 0, 1331 0, 1332 0, 1333 0, 1334 /*70*/ MatGetRowMax_SeqSBAIJ, 1335 0, 1336 0, 1337 0, 1338 0, 1339 /*75*/ 0, 1340 0, 1341 0, 1342 0, 1343 0, 1344 /*80*/ 0, 1345 0, 1346 0, 1347 #if !defined(PETSC_USE_COMPLEX) 1348 MatGetInertia_SeqSBAIJ, 1349 #else 1350 0, 1351 #endif 1352 /*85*/ MatLoad_SeqSBAIJ, 1353 MatIsSymmetric_SeqSBAIJ, 1354 MatIsStructurallySymmetric_SeqSBAIJ, 1355 MatIsHermitian_SeqSBAIJ 1356 }; 1357 1358 EXTERN_C_BEGIN 1359 #undef __FUNCT__ 1360 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1361 int MatStoreValues_SeqSBAIJ(Mat mat) 1362 { 1363 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1364 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1365 int ierr; 1366 1367 PetscFunctionBegin; 1368 if (aij->nonew != 1) { 1369 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1370 } 1371 1372 /* allocate space for values if not already there */ 1373 if (!aij->saved_values) { 1374 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1375 } 1376 1377 /* copy values over */ 1378 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1379 PetscFunctionReturn(0); 1380 } 1381 EXTERN_C_END 1382 1383 EXTERN_C_BEGIN 1384 #undef __FUNCT__ 1385 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1386 int MatRetrieveValues_SeqSBAIJ(Mat mat) 1387 { 1388 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1389 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 1390 1391 PetscFunctionBegin; 1392 if (aij->nonew != 1) { 1393 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1394 } 1395 if (!aij->saved_values) { 1396 SETERRQ(1,"Must call MatStoreValues(A);first"); 1397 } 1398 1399 /* copy values over */ 1400 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1401 PetscFunctionReturn(0); 1402 } 1403 EXTERN_C_END 1404 1405 EXTERN_C_BEGIN 1406 #undef __FUNCT__ 1407 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ" 1408 int MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,int bs,int nz,int *nnz) 1409 { 1410 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1411 int i,len,ierr,mbs,bs2; 1412 PetscTruth flg; 1413 1414 PetscFunctionBegin; 1415 B->preallocated = PETSC_TRUE; 1416 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1417 mbs = B->m/bs; 1418 bs2 = bs*bs; 1419 1420 if (mbs*bs != B->m) { 1421 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1422 } 1423 1424 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1425 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1426 if (nnz) { 1427 for (i=0; i<mbs; i++) { 1428 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1429 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],mbs); 1430 } 1431 } 1432 1433 ierr = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1434 if (!flg) { 1435 switch (bs) { 1436 case 1: 1437 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1438 B->ops->solve = MatSolve_SeqSBAIJ_1; 1439 B->ops->solves = MatSolves_SeqSBAIJ_1; 1440 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_1; 1441 B->ops->mult = MatMult_SeqSBAIJ_1; 1442 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1443 break; 1444 case 2: 1445 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1446 B->ops->solve = MatSolve_SeqSBAIJ_2; 1447 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_2; 1448 B->ops->mult = MatMult_SeqSBAIJ_2; 1449 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1450 break; 1451 case 3: 1452 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1453 B->ops->solve = MatSolve_SeqSBAIJ_3; 1454 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_3; 1455 B->ops->mult = MatMult_SeqSBAIJ_3; 1456 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1457 break; 1458 case 4: 1459 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1460 B->ops->solve = MatSolve_SeqSBAIJ_4; 1461 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_4; 1462 B->ops->mult = MatMult_SeqSBAIJ_4; 1463 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1464 break; 1465 case 5: 1466 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1467 B->ops->solve = MatSolve_SeqSBAIJ_5; 1468 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_5; 1469 B->ops->mult = MatMult_SeqSBAIJ_5; 1470 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1471 break; 1472 case 6: 1473 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1474 B->ops->solve = MatSolve_SeqSBAIJ_6; 1475 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_6; 1476 B->ops->mult = MatMult_SeqSBAIJ_6; 1477 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1478 break; 1479 case 7: 1480 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1481 B->ops->solve = MatSolve_SeqSBAIJ_7; 1482 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_7; 1483 B->ops->mult = MatMult_SeqSBAIJ_7; 1484 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1485 break; 1486 } 1487 } 1488 1489 b->mbs = mbs; 1490 b->nbs = mbs; 1491 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 1492 if (!nnz) { 1493 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1494 else if (nz <= 0) nz = 1; 1495 for (i=0; i<mbs; i++) { 1496 b->imax[i] = nz; 1497 } 1498 nz = nz*mbs; /* total nz */ 1499 } else { 1500 nz = 0; 1501 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1502 } 1503 /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1504 1505 /* allocate the matrix space */ 1506 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 1507 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1508 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1509 b->j = (int*)(b->a + nz*bs2); 1510 ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 1511 b->i = b->j + nz; 1512 b->singlemalloc = PETSC_TRUE; 1513 1514 /* pointer to beginning of each row */ 1515 b->i[0] = 0; 1516 for (i=1; i<mbs+1; i++) { 1517 b->i[i] = b->i[i-1] + b->imax[i-1]; 1518 } 1519 1520 /* b->ilen will count nonzeros in each block row so far. */ 1521 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 1522 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1523 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1524 1525 b->bs = bs; 1526 b->bs2 = bs2; 1527 b->nz = 0; 1528 b->maxnz = nz*bs2; 1529 1530 b->inew = 0; 1531 b->jnew = 0; 1532 b->anew = 0; 1533 b->a2anew = 0; 1534 b->permute = PETSC_FALSE; 1535 PetscFunctionReturn(0); 1536 } 1537 EXTERN_C_END 1538 1539 /*MC 1540 MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices, 1541 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1542 1543 Options Database Keys: 1544 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions() 1545 1546 Level: beginner 1547 1548 .seealso: MatCreateSeqSBAIJ 1549 M*/ 1550 1551 EXTERN_C_BEGIN 1552 #undef __FUNCT__ 1553 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1554 int MatCreate_SeqSBAIJ(Mat B) 1555 { 1556 Mat_SeqSBAIJ *b; 1557 int ierr,size; 1558 1559 PetscFunctionBegin; 1560 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1561 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1562 B->m = B->M = PetscMax(B->m,B->M); 1563 B->n = B->N = PetscMax(B->n,B->N); 1564 1565 ierr = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1566 B->data = (void*)b; 1567 ierr = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr); 1568 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1569 B->ops->destroy = MatDestroy_SeqSBAIJ; 1570 B->ops->view = MatView_SeqSBAIJ; 1571 B->factor = 0; 1572 B->lupivotthreshold = 1.0; 1573 B->mapping = 0; 1574 b->row = 0; 1575 b->icol = 0; 1576 b->reallocs = 0; 1577 b->saved_values = 0; 1578 1579 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1580 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1581 1582 b->sorted = PETSC_FALSE; 1583 b->roworiented = PETSC_TRUE; 1584 b->nonew = 0; 1585 b->diag = 0; 1586 b->solve_work = 0; 1587 b->mult_work = 0; 1588 B->spptr = 0; 1589 b->keepzeroedrows = PETSC_FALSE; 1590 b->xtoy = 0; 1591 b->XtoY = 0; 1592 1593 b->inew = 0; 1594 b->jnew = 0; 1595 b->anew = 0; 1596 b->a2anew = 0; 1597 b->permute = PETSC_FALSE; 1598 1599 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1600 "MatStoreValues_SeqSBAIJ", 1601 MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1602 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1603 "MatRetrieveValues_SeqSBAIJ", 1604 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1605 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1606 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1607 MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1608 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C", 1609 "MatConvert_SeqSBAIJ_SeqAIJ", 1610 MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr); 1611 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C", 1612 "MatSeqSBAIJSetPreallocation_SeqSBAIJ", 1613 MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr); 1614 PetscFunctionReturn(0); 1615 } 1616 EXTERN_C_END 1617 1618 #undef __FUNCT__ 1619 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1620 /*@C 1621 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1622 compressed row) format. For good matrix assembly performance the 1623 user should preallocate the matrix storage by setting the parameter nz 1624 (or the array nnz). By setting these parameters accurately, performance 1625 during matrix assembly can be increased by more than a factor of 50. 1626 1627 Collective on Mat 1628 1629 Input Parameters: 1630 + A - the symmetric matrix 1631 . bs - size of block 1632 . nz - number of block nonzeros per block row (same for all rows) 1633 - nnz - array containing the number of block nonzeros in the upper triangular plus 1634 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1635 1636 Options Database Keys: 1637 . -mat_no_unroll - uses code that does not unroll the loops in the 1638 block calculations (much slower) 1639 . -mat_block_size - size of the blocks to use 1640 1641 Level: intermediate 1642 1643 Notes: 1644 Specify the preallocated storage with either nz or nnz (not both). 1645 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1646 allocation. For additional details, see the users manual chapter on 1647 matrices. 1648 1649 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1650 @*/ 1651 int MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[]) { 1652 int ierr,(*f)(Mat,int,int,const int[]); 1653 1654 PetscFunctionBegin; 1655 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1656 if (f) { 1657 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 1658 } 1659 PetscFunctionReturn(0); 1660 } 1661 1662 #undef __FUNCT__ 1663 #define __FUNCT__ "MatCreateSeqSBAIJ" 1664 /*@C 1665 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1666 compressed row) format. For good matrix assembly performance the 1667 user should preallocate the matrix storage by setting the parameter nz 1668 (or the array nnz). By setting these parameters accurately, performance 1669 during matrix assembly can be increased by more than a factor of 50. 1670 1671 Collective on MPI_Comm 1672 1673 Input Parameters: 1674 + comm - MPI communicator, set to PETSC_COMM_SELF 1675 . bs - size of block 1676 . m - number of rows, or number of columns 1677 . nz - number of block nonzeros per block row (same for all rows) 1678 - nnz - array containing the number of block nonzeros in the upper triangular plus 1679 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1680 1681 Output Parameter: 1682 . A - the symmetric matrix 1683 1684 Options Database Keys: 1685 . -mat_no_unroll - uses code that does not unroll the loops in the 1686 block calculations (much slower) 1687 . -mat_block_size - size of the blocks to use 1688 1689 Level: intermediate 1690 1691 Notes: 1692 1693 Specify the preallocated storage with either nz or nnz (not both). 1694 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1695 allocation. For additional details, see the users manual chapter on 1696 matrices. 1697 1698 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1699 @*/ 1700 int MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A) 1701 { 1702 int ierr; 1703 1704 PetscFunctionBegin; 1705 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1706 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1707 ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1708 PetscFunctionReturn(0); 1709 } 1710 1711 #undef __FUNCT__ 1712 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1713 int MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1714 { 1715 Mat C; 1716 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1717 int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr; 1718 1719 PetscFunctionBegin; 1720 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1721 1722 *B = 0; 1723 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1724 ierr = MatSetType(C,MATSEQSBAIJ);CHKERRQ(ierr); 1725 c = (Mat_SeqSBAIJ*)C->data; 1726 1727 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1728 C->preallocated = PETSC_TRUE; 1729 C->factor = A->factor; 1730 c->row = 0; 1731 c->icol = 0; 1732 c->saved_values = 0; 1733 c->keepzeroedrows = a->keepzeroedrows; 1734 C->assembled = PETSC_TRUE; 1735 1736 c->bs = a->bs; 1737 c->bs2 = a->bs2; 1738 c->mbs = a->mbs; 1739 c->nbs = a->nbs; 1740 1741 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1742 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1743 for (i=0; i<mbs; i++) { 1744 c->imax[i] = a->imax[i]; 1745 c->ilen[i] = a->ilen[i]; 1746 } 1747 1748 /* allocate the matrix space */ 1749 c->singlemalloc = PETSC_TRUE; 1750 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1751 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1752 c->j = (int*)(c->a + nz*bs2); 1753 c->i = c->j + nz; 1754 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1755 if (mbs > 0) { 1756 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1757 if (cpvalues == MAT_COPY_VALUES) { 1758 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1759 } else { 1760 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1761 } 1762 } 1763 1764 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1765 c->sorted = a->sorted; 1766 c->roworiented = a->roworiented; 1767 c->nonew = a->nonew; 1768 1769 if (a->diag) { 1770 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1771 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1772 for (i=0; i<mbs; i++) { 1773 c->diag[i] = a->diag[i]; 1774 } 1775 } else c->diag = 0; 1776 c->nz = a->nz; 1777 c->maxnz = a->maxnz; 1778 c->solve_work = 0; 1779 C->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1780 c->mult_work = 0; 1781 *B = C; 1782 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1783 PetscFunctionReturn(0); 1784 } 1785 1786 #undef __FUNCT__ 1787 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1788 int MatLoad_SeqSBAIJ(PetscViewer viewer,const MatType type,Mat *A) 1789 { 1790 Mat_SeqSBAIJ *a; 1791 Mat B; 1792 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1793 int *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 1794 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1795 int *masked,nmask,tmp,bs2,ishift; 1796 PetscScalar *aa; 1797 MPI_Comm comm = ((PetscObject)viewer)->comm; 1798 1799 PetscFunctionBegin; 1800 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1801 bs2 = bs*bs; 1802 1803 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1804 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1805 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1806 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1807 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1808 M = header[1]; N = header[2]; nz = header[3]; 1809 1810 if (header[3] < 0) { 1811 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1812 } 1813 1814 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1815 1816 /* 1817 This code adds extra rows to make sure the number of rows is 1818 divisible by the blocksize 1819 */ 1820 mbs = M/bs; 1821 extra_rows = bs - M + bs*(mbs); 1822 if (extra_rows == bs) extra_rows = 0; 1823 else mbs++; 1824 if (extra_rows) { 1825 PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n"); 1826 } 1827 1828 /* read in row lengths */ 1829 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1830 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1831 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1832 1833 /* read in column indices */ 1834 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1835 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1836 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1837 1838 /* loop over row lengths determining block row lengths */ 1839 ierr = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr); 1840 ierr = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1841 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1842 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1843 masked = mask + mbs; 1844 rowcount = 0; nzcount = 0; 1845 for (i=0; i<mbs; i++) { 1846 nmask = 0; 1847 for (j=0; j<bs; j++) { 1848 kmax = rowlengths[rowcount]; 1849 for (k=0; k<kmax; k++) { 1850 tmp = jj[nzcount++]/bs; /* block col. index */ 1851 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1852 } 1853 rowcount++; 1854 } 1855 s_browlengths[i] += nmask; 1856 1857 /* zero out the mask elements we set */ 1858 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1859 } 1860 1861 /* create our matrix */ 1862 ierr = MatCreate(comm,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows,&B);CHKERRQ(ierr); 1863 ierr = MatSetType(B,type);CHKERRQ(ierr); 1864 ierr = MatSeqSBAIJSetPreallocation(B,bs,0,s_browlengths);CHKERRQ(ierr); 1865 a = (Mat_SeqSBAIJ*)B->data; 1866 1867 /* set matrix "i" values */ 1868 a->i[0] = 0; 1869 for (i=1; i<= mbs; i++) { 1870 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1871 a->ilen[i-1] = s_browlengths[i-1]; 1872 } 1873 a->nz = a->i[mbs]; 1874 1875 /* read in nonzero values */ 1876 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1877 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1878 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1879 1880 /* set "a" and "j" values into matrix */ 1881 nzcount = 0; jcount = 0; 1882 for (i=0; i<mbs; i++) { 1883 nzcountb = nzcount; 1884 nmask = 0; 1885 for (j=0; j<bs; j++) { 1886 kmax = rowlengths[i*bs+j]; 1887 for (k=0; k<kmax; k++) { 1888 tmp = jj[nzcount++]/bs; /* block col. index */ 1889 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1890 } 1891 } 1892 /* sort the masked values */ 1893 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1894 1895 /* set "j" values into matrix */ 1896 maskcount = 1; 1897 for (j=0; j<nmask; j++) { 1898 a->j[jcount++] = masked[j]; 1899 mask[masked[j]] = maskcount++; 1900 } 1901 1902 /* set "a" values into matrix */ 1903 ishift = bs2*a->i[i]; 1904 for (j=0; j<bs; j++) { 1905 kmax = rowlengths[i*bs+j]; 1906 for (k=0; k<kmax; k++) { 1907 tmp = jj[nzcountb]/bs ; /* block col. index */ 1908 if (tmp >= i){ 1909 block = mask[tmp] - 1; 1910 point = jj[nzcountb] - bs*tmp; 1911 idx = ishift + bs2*block + j + bs*point; 1912 a->a[idx] = aa[nzcountb]; 1913 } 1914 nzcountb++; 1915 } 1916 } 1917 /* zero out the mask elements we set */ 1918 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1919 } 1920 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1921 1922 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1923 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1924 ierr = PetscFree(aa);CHKERRQ(ierr); 1925 ierr = PetscFree(jj);CHKERRQ(ierr); 1926 ierr = PetscFree(mask);CHKERRQ(ierr); 1927 1928 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1929 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1930 ierr = MatView_Private(B);CHKERRQ(ierr); 1931 *A = B; 1932 PetscFunctionReturn(0); 1933 } 1934 1935 #undef __FUNCT__ 1936 #define __FUNCT__ "MatRelax_SeqSBAIJ" 1937 int MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1938 { 1939 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1940 MatScalar *aa=a->a,*v,*v1; 1941 PetscScalar *x,*b,*t,sum,d; 1942 int m=a->mbs,bs=a->bs,*ai=a->i,*aj=a->j,ierr; 1943 int nz,nz1,*vj,*vj1,i; 1944 1945 PetscFunctionBegin; 1946 its = its*lits; 1947 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1948 1949 if (bs > 1) 1950 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 1951 1952 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1953 if (xx != bb) { 1954 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1955 } else { 1956 b = x; 1957 } 1958 1959 ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr); 1960 1961 if (flag & SOR_ZERO_INITIAL_GUESS) { 1962 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1963 for (i=0; i<m; i++) 1964 t[i] = b[i]; 1965 1966 for (i=0; i<m; i++){ 1967 d = *(aa + ai[i]); /* diag[i] */ 1968 v = aa + ai[i] + 1; 1969 vj = aj + ai[i] + 1; 1970 nz = ai[i+1] - ai[i] - 1; 1971 x[i] = omega*t[i]/d; 1972 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 1973 PetscLogFlops(2*nz-1); 1974 } 1975 } 1976 1977 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 1978 for (i=0; i<m; i++) 1979 t[i] = b[i]; 1980 1981 for (i=0; i<m-1; i++){ /* update rhs */ 1982 v = aa + ai[i] + 1; 1983 vj = aj + ai[i] + 1; 1984 nz = ai[i+1] - ai[i] - 1; 1985 while (nz--) t[*vj++] -= x[i]*(*v++); 1986 PetscLogFlops(2*nz-1); 1987 } 1988 for (i=m-1; i>=0; i--){ 1989 d = *(aa + ai[i]); 1990 v = aa + ai[i] + 1; 1991 vj = aj + ai[i] + 1; 1992 nz = ai[i+1] - ai[i] - 1; 1993 sum = t[i]; 1994 while (nz--) sum -= x[*vj++]*(*v++); 1995 PetscLogFlops(2*nz-1); 1996 x[i] = (1-omega)*x[i] + omega*sum/d; 1997 } 1998 } 1999 its--; 2000 } 2001 2002 while (its--) { 2003 /* 2004 forward sweep: 2005 for i=0,...,m-1: 2006 sum[i] = (b[i] - U(i,:)x )/d[i]; 2007 x[i] = (1-omega)x[i] + omega*sum[i]; 2008 b = b - x[i]*U^T(i,:); 2009 2010 */ 2011 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2012 for (i=0; i<m; i++) 2013 t[i] = b[i]; 2014 2015 for (i=0; i<m; i++){ 2016 d = *(aa + ai[i]); /* diag[i] */ 2017 v = aa + ai[i] + 1; v1=v; 2018 vj = aj + ai[i] + 1; vj1=vj; 2019 nz = ai[i+1] - ai[i] - 1; nz1=nz; 2020 sum = t[i]; 2021 while (nz1--) sum -= (*v1++)*x[*vj1++]; 2022 x[i] = (1-omega)*x[i] + omega*sum/d; 2023 while (nz--) t[*vj++] -= x[i]*(*v++); 2024 PetscLogFlops(4*nz-2); 2025 } 2026 } 2027 2028 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2029 /* 2030 backward sweep: 2031 b = b - x[i]*U^T(i,:), i=0,...,n-2 2032 for i=m-1,...,0: 2033 sum[i] = (b[i] - U(i,:)x )/d[i]; 2034 x[i] = (1-omega)x[i] + omega*sum[i]; 2035 */ 2036 for (i=0; i<m; i++) 2037 t[i] = b[i]; 2038 2039 for (i=0; i<m-1; i++){ /* update rhs */ 2040 v = aa + ai[i] + 1; 2041 vj = aj + ai[i] + 1; 2042 nz = ai[i+1] - ai[i] - 1; 2043 while (nz--) t[*vj++] -= x[i]*(*v++); 2044 PetscLogFlops(2*nz-1); 2045 } 2046 for (i=m-1; i>=0; i--){ 2047 d = *(aa + ai[i]); 2048 v = aa + ai[i] + 1; 2049 vj = aj + ai[i] + 1; 2050 nz = ai[i+1] - ai[i] - 1; 2051 sum = t[i]; 2052 while (nz--) sum -= x[*vj++]*(*v++); 2053 PetscLogFlops(2*nz-1); 2054 x[i] = (1-omega)*x[i] + omega*sum/d; 2055 } 2056 } 2057 } 2058 2059 ierr = PetscFree(t); CHKERRQ(ierr); 2060 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2061 if (bb != xx) { 2062 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2063 } 2064 2065 PetscFunctionReturn(0); 2066 } 2067 2068 2069 2070 2071 2072 2073