1 2 /* 3 Defines the basic matrix operations for the SBAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 7 #include "src/inline/spops.h" 8 #include "src/mat/impls/sbaij/seq/sbaij.h" 9 10 #define CHUNKSIZE 10 11 12 /* 13 Checks for missing diagonals 14 */ 15 #undef __FUNCT__ 16 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 17 int MatMissingDiagonal_SeqSBAIJ(Mat A) 18 { 19 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 20 int *diag,*jj = a->j,i,ierr; 21 22 PetscFunctionBegin; 23 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 24 diag = a->diag; 25 for (i=0; i<a->mbs; i++) { 26 if (jj[diag[i]] != i) SETERRQ1(1,"Matrix is missing diagonal number %d",i); 27 } 28 PetscFunctionReturn(0); 29 } 30 31 #undef __FUNCT__ 32 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 33 int MatMarkDiagonal_SeqSBAIJ(Mat A) 34 { 35 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 36 int i,mbs = a->mbs,ierr; 37 38 PetscFunctionBegin; 39 if (a->diag) PetscFunctionReturn(0); 40 41 ierr = PetscMalloc((mbs+1)*sizeof(int),&a->diag);CHKERRQ(ierr); 42 PetscLogObjectMemory(A,(mbs+1)*sizeof(int)); 43 for (i=0; i<mbs; i++) a->diag[i] = a->i[i]; 44 PetscFunctionReturn(0); 45 } 46 47 #undef __FUNCT__ 48 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 49 static int MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done) 50 { 51 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 52 int n = a->mbs,i; 53 54 PetscFunctionBegin; 55 *nn = n; 56 if (!ia) PetscFunctionReturn(0); 57 58 if (oshift == 1) { 59 /* temporarily add 1 to i and j indices */ 60 int nz = a->i[n]; 61 for (i=0; i<nz; i++) a->j[i]++; 62 for (i=0; i<n+1; i++) a->i[i]++; 63 *ia = a->i; *ja = a->j; 64 } else { 65 *ia = a->i; *ja = a->j; 66 } 67 PetscFunctionReturn(0); 68 } 69 70 #undef __FUNCT__ 71 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 72 static int MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done) 73 { 74 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 75 int i,n = a->mbs; 76 77 PetscFunctionBegin; 78 if (!ia) PetscFunctionReturn(0); 79 80 if (oshift == 1) { 81 int nz = a->i[n]-1; 82 for (i=0; i<nz; i++) a->j[i]--; 83 for (i=0; i<n+1; i++) a->i[i]--; 84 } 85 PetscFunctionReturn(0); 86 } 87 88 #undef __FUNCT__ 89 #define __FUNCT__ "MatGetBlockSize_SeqSBAIJ" 90 int MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs) 91 { 92 Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data; 93 94 PetscFunctionBegin; 95 *bs = sbaij->bs; 96 PetscFunctionReturn(0); 97 } 98 99 #undef __FUNCT__ 100 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 101 int MatDestroy_SeqSBAIJ(Mat A) 102 { 103 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 104 int ierr; 105 106 PetscFunctionBegin; 107 #if defined(PETSC_USE_LOG) 108 PetscLogObjectState((PetscObject)A,"Rows=%d, NZ=%d",A->m,a->nz); 109 #endif 110 ierr = PetscFree(a->a);CHKERRQ(ierr); 111 if (!a->singlemalloc) { 112 ierr = PetscFree(a->i);CHKERRQ(ierr); 113 ierr = PetscFree(a->j);CHKERRQ(ierr); 114 } 115 if (a->row) { 116 ierr = ISDestroy(a->row);CHKERRQ(ierr); 117 } 118 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 119 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 120 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 121 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 122 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 123 if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);} 124 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 125 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 126 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 127 128 if (a->inew){ 129 ierr = PetscFree(a->inew);CHKERRQ(ierr); 130 a->inew = 0; 131 } 132 ierr = PetscFree(a);CHKERRQ(ierr); 133 PetscFunctionReturn(0); 134 } 135 136 #undef __FUNCT__ 137 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 138 int MatSetOption_SeqSBAIJ(Mat A,MatOption op) 139 { 140 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 141 142 PetscFunctionBegin; 143 switch (op) { 144 case MAT_ROW_ORIENTED: 145 a->roworiented = PETSC_TRUE; 146 break; 147 case MAT_COLUMN_ORIENTED: 148 a->roworiented = PETSC_FALSE; 149 break; 150 case MAT_COLUMNS_SORTED: 151 a->sorted = PETSC_TRUE; 152 break; 153 case MAT_COLUMNS_UNSORTED: 154 a->sorted = PETSC_FALSE; 155 break; 156 case MAT_KEEP_ZEROED_ROWS: 157 a->keepzeroedrows = PETSC_TRUE; 158 break; 159 case MAT_NO_NEW_NONZERO_LOCATIONS: 160 a->nonew = 1; 161 break; 162 case MAT_NEW_NONZERO_LOCATION_ERR: 163 a->nonew = -1; 164 break; 165 case MAT_NEW_NONZERO_ALLOCATION_ERR: 166 a->nonew = -2; 167 break; 168 case MAT_YES_NEW_NONZERO_LOCATIONS: 169 a->nonew = 0; 170 break; 171 case MAT_ROWS_SORTED: 172 case MAT_ROWS_UNSORTED: 173 case MAT_YES_NEW_DIAGONALS: 174 case MAT_IGNORE_OFF_PROC_ENTRIES: 175 case MAT_USE_HASH_TABLE: 176 PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n"); 177 break; 178 case MAT_NO_NEW_DIAGONALS: 179 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 180 case MAT_NOT_SYMMETRIC: 181 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 182 case MAT_HERMITIAN: 183 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 184 case MAT_SYMMETRIC: 185 case MAT_STRUCTURALLY_SYMMETRIC: 186 case MAT_NOT_HERMITIAN: 187 case MAT_SYMMETRY_ETERNAL: 188 case MAT_NOT_SYMMETRY_ETERNAL: 189 break; 190 default: 191 SETERRQ(PETSC_ERR_SUP,"unknown option"); 192 } 193 PetscFunctionReturn(0); 194 } 195 196 #undef __FUNCT__ 197 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 198 int MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,PetscScalar **v) 199 { 200 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 201 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2,ierr; 202 MatScalar *aa,*aa_i; 203 PetscScalar *v_i; 204 205 PetscFunctionBegin; 206 bs = a->bs; 207 ai = a->i; 208 aj = a->j; 209 aa = a->a; 210 bs2 = a->bs2; 211 212 if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %d out of range", row); 213 214 bn = row/bs; /* Block number */ 215 bp = row % bs; /* Block position */ 216 M = ai[bn+1] - ai[bn]; 217 *ncols = bs*M; 218 219 if (v) { 220 *v = 0; 221 if (*ncols) { 222 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 223 for (i=0; i<M; i++) { /* for each block in the block row */ 224 v_i = *v + i*bs; 225 aa_i = aa + bs2*(ai[bn] + i); 226 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 227 } 228 } 229 } 230 231 if (cols) { 232 *cols = 0; 233 if (*ncols) { 234 ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr); 235 for (i=0; i<M; i++) { /* for each block in the block row */ 236 cols_i = *cols + i*bs; 237 itmp = bs*aj[ai[bn] + i]; 238 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 239 } 240 } 241 } 242 243 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 244 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 245 #ifdef column_search 246 v_i = *v + M*bs; 247 cols_i = *cols + M*bs; 248 for (i=0; i<bn; i++){ /* for each block row */ 249 M = ai[i+1] - ai[i]; 250 for (j=0; j<M; j++){ 251 itmp = aj[ai[i] + j]; /* block column value */ 252 if (itmp == bn){ 253 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 254 for (k=0; k<bs; k++) { 255 *cols_i++ = i*bs+k; 256 *v_i++ = aa_i[k]; 257 } 258 *ncols += bs; 259 break; 260 } 261 } 262 } 263 #endif 264 265 PetscFunctionReturn(0); 266 } 267 268 #undef __FUNCT__ 269 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 270 int MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 271 { 272 int ierr; 273 274 PetscFunctionBegin; 275 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 276 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 277 PetscFunctionReturn(0); 278 } 279 280 #undef __FUNCT__ 281 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 282 int MatTranspose_SeqSBAIJ(Mat A,Mat *B) 283 { 284 int ierr; 285 PetscFunctionBegin; 286 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 287 PetscFunctionReturn(0); 288 } 289 290 #undef __FUNCT__ 291 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 292 static int MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 293 { 294 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 295 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 296 char *name; 297 PetscViewerFormat format; 298 299 PetscFunctionBegin; 300 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 301 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 302 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 303 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 304 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 305 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 306 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 307 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 308 for (i=0; i<a->mbs; i++) { 309 for (j=0; j<bs; j++) { 310 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 311 for (k=a->i[i]; k<a->i[i+1]; k++) { 312 for (l=0; l<bs; l++) { 313 #if defined(PETSC_USE_COMPLEX) 314 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 315 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 316 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 317 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 318 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 319 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 320 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 321 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 322 } 323 #else 324 if (a->a[bs2*k + l*bs + j] != 0.0) { 325 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 326 } 327 #endif 328 } 329 } 330 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 331 } 332 } 333 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 334 } else { 335 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 336 for (i=0; i<a->mbs; i++) { 337 for (j=0; j<bs; j++) { 338 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 339 for (k=a->i[i]; k<a->i[i+1]; k++) { 340 for (l=0; l<bs; l++) { 341 #if defined(PETSC_USE_COMPLEX) 342 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 343 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 344 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 345 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 346 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 347 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 348 } else { 349 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 350 } 351 #else 352 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 353 #endif 354 } 355 } 356 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 357 } 358 } 359 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 360 } 361 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 362 PetscFunctionReturn(0); 363 } 364 365 #undef __FUNCT__ 366 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 367 static int MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 368 { 369 Mat A = (Mat) Aa; 370 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 371 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank; 372 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 373 MatScalar *aa; 374 MPI_Comm comm; 375 PetscViewer viewer; 376 377 PetscFunctionBegin; 378 /* 379 This is nasty. If this is called from an originally parallel matrix 380 then all processes call this,but only the first has the matrix so the 381 rest should return immediately. 382 */ 383 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 384 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 385 if (rank) PetscFunctionReturn(0); 386 387 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 388 389 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 390 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 391 392 /* loop over matrix elements drawing boxes */ 393 color = PETSC_DRAW_BLUE; 394 for (i=0,row=0; i<mbs; i++,row+=bs) { 395 for (j=a->i[i]; j<a->i[i+1]; j++) { 396 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 397 x_l = a->j[j]*bs; x_r = x_l + 1.0; 398 aa = a->a + j*bs2; 399 for (k=0; k<bs; k++) { 400 for (l=0; l<bs; l++) { 401 if (PetscRealPart(*aa++) >= 0.) continue; 402 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 403 } 404 } 405 } 406 } 407 color = PETSC_DRAW_CYAN; 408 for (i=0,row=0; i<mbs; i++,row+=bs) { 409 for (j=a->i[i]; j<a->i[i+1]; j++) { 410 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 411 x_l = a->j[j]*bs; x_r = x_l + 1.0; 412 aa = a->a + j*bs2; 413 for (k=0; k<bs; k++) { 414 for (l=0; l<bs; l++) { 415 if (PetscRealPart(*aa++) != 0.) continue; 416 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 417 } 418 } 419 } 420 } 421 422 color = PETSC_DRAW_RED; 423 for (i=0,row=0; i<mbs; i++,row+=bs) { 424 for (j=a->i[i]; j<a->i[i+1]; j++) { 425 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 426 x_l = a->j[j]*bs; x_r = x_l + 1.0; 427 aa = a->a + j*bs2; 428 for (k=0; k<bs; k++) { 429 for (l=0; l<bs; l++) { 430 if (PetscRealPart(*aa++) <= 0.) continue; 431 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 432 } 433 } 434 } 435 } 436 PetscFunctionReturn(0); 437 } 438 439 #undef __FUNCT__ 440 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 441 static int MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 442 { 443 int ierr; 444 PetscReal xl,yl,xr,yr,w,h; 445 PetscDraw draw; 446 PetscTruth isnull; 447 448 PetscFunctionBegin; 449 450 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 451 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 452 453 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 454 xr = A->m; yr = A->m; h = yr/10.0; w = xr/10.0; 455 xr += w; yr += h; xl = -w; yl = -h; 456 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 457 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 458 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 459 PetscFunctionReturn(0); 460 } 461 462 #undef __FUNCT__ 463 #define __FUNCT__ "MatView_SeqSBAIJ" 464 int MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 465 { 466 int ierr; 467 PetscTruth iascii,isdraw; 468 469 PetscFunctionBegin; 470 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 471 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 472 if (iascii){ 473 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 474 } else if (isdraw) { 475 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 476 } else { 477 Mat B; 478 ierr = MatConvert(A,MATSEQAIJ,&B);CHKERRQ(ierr); 479 ierr = MatView(B,viewer);CHKERRQ(ierr); 480 ierr = MatDestroy(B);CHKERRQ(ierr); 481 } 482 PetscFunctionReturn(0); 483 } 484 485 486 #undef __FUNCT__ 487 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 488 int MatGetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[]) 489 { 490 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 491 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 492 int *ai = a->i,*ailen = a->ilen; 493 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 494 MatScalar *ap,*aa = a->a,zero = 0.0; 495 496 PetscFunctionBegin; 497 for (k=0; k<m; k++) { /* loop over rows */ 498 row = im[k]; brow = row/bs; 499 if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",row); 500 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1); 501 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 502 nrow = ailen[brow]; 503 for (l=0; l<n; l++) { /* loop over columns */ 504 if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",in[l]); 505 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1); 506 col = in[l] ; 507 bcol = col/bs; 508 cidx = col%bs; 509 ridx = row%bs; 510 high = nrow; 511 low = 0; /* assume unsorted */ 512 while (high-low > 5) { 513 t = (low+high)/2; 514 if (rp[t] > bcol) high = t; 515 else low = t; 516 } 517 for (i=low; i<high; i++) { 518 if (rp[i] > bcol) break; 519 if (rp[i] == bcol) { 520 *v++ = ap[bs2*i+bs*cidx+ridx]; 521 goto finished; 522 } 523 } 524 *v++ = zero; 525 finished:; 526 } 527 } 528 PetscFunctionReturn(0); 529 } 530 531 532 #undef __FUNCT__ 533 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 534 int MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is) 535 { 536 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 537 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 538 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 539 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 540 PetscTruth roworiented=a->roworiented; 541 const MatScalar *value = v; 542 MatScalar *ap,*aa = a->a,*bap; 543 544 PetscFunctionBegin; 545 if (roworiented) { 546 stepval = (n-1)*bs; 547 } else { 548 stepval = (m-1)*bs; 549 } 550 for (k=0; k<m; k++) { /* loop over added rows */ 551 row = im[k]; 552 if (row < 0) continue; 553 #if defined(PETSC_USE_BOPT_g) 554 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1); 555 #endif 556 rp = aj + ai[row]; 557 ap = aa + bs2*ai[row]; 558 rmax = imax[row]; 559 nrow = ailen[row]; 560 low = 0; 561 for (l=0; l<n; l++) { /* loop over added columns */ 562 if (in[l] < 0) continue; 563 col = in[l]; 564 #if defined(PETSC_USE_BOPT_g) 565 if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",col,a->nbs-1); 566 #endif 567 if (col < row) continue; /* ignore lower triangular block */ 568 if (roworiented) { 569 value = v + k*(stepval+bs)*bs + l*bs; 570 } else { 571 value = v + l*(stepval+bs)*bs + k*bs; 572 } 573 if (!sorted) low = 0; high = nrow; 574 while (high-low > 7) { 575 t = (low+high)/2; 576 if (rp[t] > col) high = t; 577 else low = t; 578 } 579 for (i=low; i<high; i++) { 580 if (rp[i] > col) break; 581 if (rp[i] == col) { 582 bap = ap + bs2*i; 583 if (roworiented) { 584 if (is == ADD_VALUES) { 585 for (ii=0; ii<bs; ii++,value+=stepval) { 586 for (jj=ii; jj<bs2; jj+=bs) { 587 bap[jj] += *value++; 588 } 589 } 590 } else { 591 for (ii=0; ii<bs; ii++,value+=stepval) { 592 for (jj=ii; jj<bs2; jj+=bs) { 593 bap[jj] = *value++; 594 } 595 } 596 } 597 } else { 598 if (is == ADD_VALUES) { 599 for (ii=0; ii<bs; ii++,value+=stepval) { 600 for (jj=0; jj<bs; jj++) { 601 *bap++ += *value++; 602 } 603 } 604 } else { 605 for (ii=0; ii<bs; ii++,value+=stepval) { 606 for (jj=0; jj<bs; jj++) { 607 *bap++ = *value++; 608 } 609 } 610 } 611 } 612 goto noinsert2; 613 } 614 } 615 if (nonew == 1) goto noinsert2; 616 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 617 if (nrow >= rmax) { 618 /* there is no extra room in row, therefore enlarge */ 619 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 620 MatScalar *new_a; 621 622 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 623 624 /* malloc new storage space */ 625 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 626 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 627 new_j = (int*)(new_a + bs2*new_nz); 628 new_i = new_j + new_nz; 629 630 /* copy over old data into new slots */ 631 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 632 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 633 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 634 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 635 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 636 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 637 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 638 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 639 /* free up old matrix storage */ 640 ierr = PetscFree(a->a);CHKERRQ(ierr); 641 if (!a->singlemalloc) { 642 ierr = PetscFree(a->i);CHKERRQ(ierr); 643 ierr = PetscFree(a->j);CHKERRQ(ierr); 644 } 645 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 646 a->singlemalloc = PETSC_TRUE; 647 648 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 649 rmax = imax[row] = imax[row] + CHUNKSIZE; 650 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 651 a->maxnz += bs2*CHUNKSIZE; 652 a->reallocs++; 653 a->nz++; 654 } 655 N = nrow++ - 1; 656 /* shift up all the later entries in this row */ 657 for (ii=N; ii>=i; ii--) { 658 rp[ii+1] = rp[ii]; 659 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 660 } 661 if (N >= i) { 662 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 663 } 664 rp[i] = col; 665 bap = ap + bs2*i; 666 if (roworiented) { 667 for (ii=0; ii<bs; ii++,value+=stepval) { 668 for (jj=ii; jj<bs2; jj+=bs) { 669 bap[jj] = *value++; 670 } 671 } 672 } else { 673 for (ii=0; ii<bs; ii++,value+=stepval) { 674 for (jj=0; jj<bs; jj++) { 675 *bap++ = *value++; 676 } 677 } 678 } 679 noinsert2:; 680 low = i; 681 } 682 ailen[row] = nrow; 683 } 684 PetscFunctionReturn(0); 685 } 686 687 #undef __FUNCT__ 688 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 689 int MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 690 { 691 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 692 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 693 int m = A->m,*ip,N,*ailen = a->ilen; 694 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 695 MatScalar *aa = a->a,*ap; 696 697 PetscFunctionBegin; 698 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 699 700 if (m) rmax = ailen[0]; 701 for (i=1; i<mbs; i++) { 702 /* move each row back by the amount of empty slots (fshift) before it*/ 703 fshift += imax[i-1] - ailen[i-1]; 704 rmax = PetscMax(rmax,ailen[i]); 705 if (fshift) { 706 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 707 N = ailen[i]; 708 for (j=0; j<N; j++) { 709 ip[j-fshift] = ip[j]; 710 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 711 } 712 } 713 ai[i] = ai[i-1] + ailen[i-1]; 714 } 715 if (mbs) { 716 fshift += imax[mbs-1] - ailen[mbs-1]; 717 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 718 } 719 /* reset ilen and imax for each row */ 720 for (i=0; i<mbs; i++) { 721 ailen[i] = imax[i] = ai[i+1] - ai[i]; 722 } 723 a->nz = ai[mbs]; 724 725 /* diagonals may have moved, reset it */ 726 if (a->diag) { 727 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(int));CHKERRQ(ierr); 728 } 729 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 730 m,A->m,a->bs,fshift*bs2,a->nz*bs2); 731 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n", 732 a->reallocs); 733 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 734 a->reallocs = 0; 735 A->info.nz_unneeded = (PetscReal)fshift*bs2; 736 737 PetscFunctionReturn(0); 738 } 739 740 /* 741 This function returns an array of flags which indicate the locations of contiguous 742 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 743 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 744 Assume: sizes should be long enough to hold all the values. 745 */ 746 #undef __FUNCT__ 747 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 748 int MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 749 { 750 int i,j,k,row; 751 PetscTruth flg; 752 753 PetscFunctionBegin; 754 for (i=0,j=0; i<n; j++) { 755 row = idx[i]; 756 if (row%bs!=0) { /* Not the begining of a block */ 757 sizes[j] = 1; 758 i++; 759 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 760 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 761 i++; 762 } else { /* Begining of the block, so check if the complete block exists */ 763 flg = PETSC_TRUE; 764 for (k=1; k<bs; k++) { 765 if (row+k != idx[i+k]) { /* break in the block */ 766 flg = PETSC_FALSE; 767 break; 768 } 769 } 770 if (flg == PETSC_TRUE) { /* No break in the bs */ 771 sizes[j] = bs; 772 i+= bs; 773 } else { 774 sizes[j] = 1; 775 i++; 776 } 777 } 778 } 779 *bs_max = j; 780 PetscFunctionReturn(0); 781 } 782 783 #undef __FUNCT__ 784 #define __FUNCT__ "MatZeroRows_SeqSBAIJ" 785 int MatZeroRows_SeqSBAIJ(Mat A,IS is,const PetscScalar *diag) 786 { 787 PetscFunctionBegin; 788 SETERRQ(PETSC_ERR_SUP,"No support for this function yet"); 789 } 790 791 /* Only add/insert a(i,j) with i<=j (blocks). 792 Any a(i,j) with i>j input by user is ingored. 793 */ 794 795 #undef __FUNCT__ 796 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 797 int MatSetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is) 798 { 799 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 800 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 801 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 802 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 803 int ridx,cidx,bs2=a->bs2,ierr; 804 MatScalar *ap,value,*aa=a->a,*bap; 805 806 PetscFunctionBegin; 807 808 for (k=0; k<m; k++) { /* loop over added rows */ 809 row = im[k]; /* row number */ 810 brow = row/bs; /* block row number */ 811 if (row < 0) continue; 812 #if defined(PETSC_USE_BOPT_g) 813 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1); 814 #endif 815 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 816 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 817 rmax = imax[brow]; /* maximum space allocated for this row */ 818 nrow = ailen[brow]; /* actual length of this row */ 819 low = 0; 820 821 for (l=0; l<n; l++) { /* loop over added columns */ 822 if (in[l] < 0) continue; 823 #if defined(PETSC_USE_BOPT_g) 824 if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m-1); 825 #endif 826 col = in[l]; 827 bcol = col/bs; /* block col number */ 828 829 if (brow <= bcol){ 830 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 831 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 832 /* element value a(k,l) */ 833 if (roworiented) { 834 value = v[l + k*n]; 835 } else { 836 value = v[k + l*m]; 837 } 838 839 /* move pointer bap to a(k,l) quickly and add/insert value */ 840 if (!sorted) low = 0; high = nrow; 841 while (high-low > 7) { 842 t = (low+high)/2; 843 if (rp[t] > bcol) high = t; 844 else low = t; 845 } 846 for (i=low; i<high; i++) { 847 /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */ 848 if (rp[i] > bcol) break; 849 if (rp[i] == bcol) { 850 bap = ap + bs2*i + bs*cidx + ridx; 851 if (is == ADD_VALUES) *bap += value; 852 else *bap = value; 853 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 854 if (brow == bcol && ridx < cidx){ 855 bap = ap + bs2*i + bs*ridx + cidx; 856 if (is == ADD_VALUES) *bap += value; 857 else *bap = value; 858 } 859 goto noinsert1; 860 } 861 } 862 863 if (nonew == 1) goto noinsert1; 864 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 865 if (nrow >= rmax) { 866 /* there is no extra room in row, therefore enlarge */ 867 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 868 MatScalar *new_a; 869 870 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 871 872 /* Malloc new storage space */ 873 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 874 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 875 new_j = (int*)(new_a + bs2*new_nz); 876 new_i = new_j + new_nz; 877 878 /* copy over old data into new slots */ 879 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 880 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 881 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 882 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 883 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 884 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 885 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 886 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 887 /* free up old matrix storage */ 888 ierr = PetscFree(a->a);CHKERRQ(ierr); 889 if (!a->singlemalloc) { 890 ierr = PetscFree(a->i);CHKERRQ(ierr); 891 ierr = PetscFree(a->j);CHKERRQ(ierr); 892 } 893 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 894 a->singlemalloc = PETSC_TRUE; 895 896 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 897 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 898 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 899 a->maxnz += bs2*CHUNKSIZE; 900 a->reallocs++; 901 a->nz++; 902 } 903 904 N = nrow++ - 1; 905 /* shift up all the later entries in this row */ 906 for (ii=N; ii>=i; ii--) { 907 rp[ii+1] = rp[ii]; 908 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 909 } 910 if (N>=i) { 911 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 912 } 913 rp[i] = bcol; 914 ap[bs2*i + bs*cidx + ridx] = value; 915 noinsert1:; 916 low = i; 917 /* } */ 918 } 919 } /* end of if .. if.. */ 920 } /* end of loop over added columns */ 921 ailen[brow] = nrow; 922 } /* end of loop over added rows */ 923 924 PetscFunctionReturn(0); 925 } 926 927 extern int MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*); 928 extern int MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*); 929 extern int MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS[],int); 930 extern int MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*); 931 extern int MatGetSubMatrices_SeqSBAIJ(Mat,int,const IS[],const IS[],MatReuse,Mat*[]); 932 extern int MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec); 933 extern int MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec); 934 extern int MatScale_SeqSBAIJ(const PetscScalar*,Mat); 935 extern int MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *); 936 extern int MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*); 937 extern int MatGetDiagonal_SeqSBAIJ(Mat,Vec); 938 extern int MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec); 939 extern int MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *); 940 extern int MatZeroEntries_SeqSBAIJ(Mat); 941 extern int MatGetRowMax_SeqSBAIJ(Mat,Vec); 942 943 extern int MatSolve_SeqSBAIJ_N(Mat,Vec,Vec); 944 extern int MatSolve_SeqSBAIJ_1(Mat,Vec,Vec); 945 extern int MatSolve_SeqSBAIJ_2(Mat,Vec,Vec); 946 extern int MatSolve_SeqSBAIJ_3(Mat,Vec,Vec); 947 extern int MatSolve_SeqSBAIJ_4(Mat,Vec,Vec); 948 extern int MatSolve_SeqSBAIJ_5(Mat,Vec,Vec); 949 extern int MatSolve_SeqSBAIJ_6(Mat,Vec,Vec); 950 extern int MatSolve_SeqSBAIJ_7(Mat,Vec,Vec); 951 extern int MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec); 952 extern int MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec); 953 extern int MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec); 954 extern int MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec); 955 extern int MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec); 956 extern int MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec); 957 extern int MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec); 958 959 extern int MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs); 960 961 extern int MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec); 962 extern int MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec); 963 extern int MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec); 964 extern int MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 965 extern int MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec); 966 extern int MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec); 967 extern int MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec); 968 extern int MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec); 969 970 extern int MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*); 971 extern int MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*); 972 extern int MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*); 973 extern int MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*); 974 extern int MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*); 975 extern int MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*); 976 extern int MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*); 977 extern int MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*); 978 extern int MatGetInertia_SeqSBAIJ(Mat,int*,int*,int*); 979 980 extern int MatMult_SeqSBAIJ_1(Mat,Vec,Vec); 981 extern int MatMult_SeqSBAIJ_2(Mat,Vec,Vec); 982 extern int MatMult_SeqSBAIJ_3(Mat,Vec,Vec); 983 extern int MatMult_SeqSBAIJ_4(Mat,Vec,Vec); 984 extern int MatMult_SeqSBAIJ_5(Mat,Vec,Vec); 985 extern int MatMult_SeqSBAIJ_6(Mat,Vec,Vec); 986 extern int MatMult_SeqSBAIJ_7(Mat,Vec,Vec); 987 extern int MatMult_SeqSBAIJ_N(Mat,Vec,Vec); 988 989 extern int MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec); 990 extern int MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec); 991 extern int MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec); 992 extern int MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec); 993 extern int MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec); 994 extern int MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec); 995 extern int MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec); 996 extern int MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec); 997 998 #ifdef HAVE_MatICCFactor 999 /* modefied from MatILUFactor_SeqSBAIJ, needs further work! */ 1000 #undef __FUNCT__ 1001 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 1002 int MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info) 1003 { 1004 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 1005 Mat outA; 1006 int ierr; 1007 PetscTruth row_identity,col_identity; 1008 1009 PetscFunctionBegin; 1010 /* 1011 if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1012 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1013 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1014 if (!row_identity || !col_identity) { 1015 SETERRQ(1,"Row and column permutations must be identity for in-place ICC"); 1016 } 1017 */ 1018 1019 outA = inA; 1020 inA->factor = FACTOR_CHOLESKY; 1021 1022 if (!a->diag) { 1023 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 1024 } 1025 /* 1026 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1027 for ILU(0) factorization with natural ordering 1028 */ 1029 switch (a->bs) { 1030 case 1: 1031 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1032 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1033 inA->ops->solves = MatSolves_SeqSBAIJ_1; 1034 PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n"); 1035 case 2: 1036 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 1037 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1038 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1039 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1040 break; 1041 case 3: 1042 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 1043 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1044 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1045 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n"); 1046 break; 1047 case 4: 1048 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 1049 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1050 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1051 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1052 break; 1053 case 5: 1054 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 1055 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1056 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1057 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1058 break; 1059 case 6: 1060 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 1061 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1062 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1063 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1064 break; 1065 case 7: 1066 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 1067 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1068 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1069 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1070 break; 1071 default: 1072 a->row = row; 1073 a->icol = col; 1074 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1075 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1076 1077 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1078 ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr); 1079 PetscLogObjectParent(inA,a->icol); 1080 1081 if (!a->solve_work) { 1082 ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1083 PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar)); 1084 } 1085 } 1086 1087 ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr); 1088 1089 PetscFunctionReturn(0); 1090 } 1091 #endif 1092 1093 #undef __FUNCT__ 1094 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ" 1095 int MatPrintHelp_SeqSBAIJ(Mat A) 1096 { 1097 static PetscTruth called = PETSC_FALSE; 1098 MPI_Comm comm = A->comm; 1099 int ierr; 1100 1101 PetscFunctionBegin; 1102 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1103 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1104 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1105 PetscFunctionReturn(0); 1106 } 1107 1108 EXTERN_C_BEGIN 1109 #undef __FUNCT__ 1110 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1111 int MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices) 1112 { 1113 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1114 int i,nz,n; 1115 1116 PetscFunctionBegin; 1117 nz = baij->maxnz; 1118 n = mat->n; 1119 for (i=0; i<nz; i++) { 1120 baij->j[i] = indices[i]; 1121 } 1122 baij->nz = nz; 1123 for (i=0; i<n; i++) { 1124 baij->ilen[i] = baij->imax[i]; 1125 } 1126 1127 PetscFunctionReturn(0); 1128 } 1129 EXTERN_C_END 1130 1131 #undef __FUNCT__ 1132 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1133 /*@ 1134 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1135 in the matrix. 1136 1137 Input Parameters: 1138 + mat - the SeqSBAIJ matrix 1139 - indices - the column indices 1140 1141 Level: advanced 1142 1143 Notes: 1144 This can be called if you have precomputed the nonzero structure of the 1145 matrix and want to provide it to the matrix object to improve the performance 1146 of the MatSetValues() operation. 1147 1148 You MUST have set the correct numbers of nonzeros per row in the call to 1149 MatCreateSeqSBAIJ(). 1150 1151 MUST be called before any calls to MatSetValues() 1152 1153 .seealso: MatCreateSeqSBAIJ 1154 @*/ 1155 int MatSeqSBAIJSetColumnIndices(Mat mat,int *indices) 1156 { 1157 int ierr,(*f)(Mat,int *); 1158 1159 PetscFunctionBegin; 1160 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1161 PetscValidPointer(indices,2); 1162 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1163 if (f) { 1164 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1165 } else { 1166 SETERRQ(1,"Wrong type of matrix to set column indices"); 1167 } 1168 PetscFunctionReturn(0); 1169 } 1170 1171 #undef __FUNCT__ 1172 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1173 int MatSetUpPreallocation_SeqSBAIJ(Mat A) 1174 { 1175 int ierr; 1176 1177 PetscFunctionBegin; 1178 ierr = MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1179 PetscFunctionReturn(0); 1180 } 1181 1182 #undef __FUNCT__ 1183 #define __FUNCT__ "MatGetArray_SeqSBAIJ" 1184 int MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1185 { 1186 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1187 PetscFunctionBegin; 1188 *array = a->a; 1189 PetscFunctionReturn(0); 1190 } 1191 1192 #undef __FUNCT__ 1193 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ" 1194 int MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1195 { 1196 PetscFunctionBegin; 1197 PetscFunctionReturn(0); 1198 } 1199 1200 #include "petscblaslapack.h" 1201 #undef __FUNCT__ 1202 #define __FUNCT__ "MatAXPY_SeqSBAIJ" 1203 int MatAXPY_SeqSBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str) 1204 { 1205 Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data; 1206 int ierr,one=1,i,bs=y->bs,bs2,j; 1207 1208 PetscFunctionBegin; 1209 if (str == SAME_NONZERO_PATTERN) { 1210 BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one); 1211 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1212 if (y->xtoy && y->XtoY != X) { 1213 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1214 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1215 } 1216 if (!y->xtoy) { /* get xtoy */ 1217 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1218 y->XtoY = X; 1219 } 1220 bs2 = bs*bs; 1221 for (i=0; i<x->nz; i++) { 1222 j = 0; 1223 while (j < bs2){ 1224 y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]); 1225 j++; 1226 } 1227 } 1228 PetscLogInfo(0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %d/%d = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)); 1229 } else { 1230 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1231 } 1232 PetscFunctionReturn(0); 1233 } 1234 1235 #undef __FUNCT__ 1236 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ" 1237 int MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg) 1238 { 1239 PetscFunctionBegin; 1240 *flg = PETSC_TRUE; 1241 PetscFunctionReturn(0); 1242 } 1243 1244 #undef __FUNCT__ 1245 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ" 1246 int MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg) 1247 { 1248 PetscFunctionBegin; 1249 *flg = PETSC_TRUE; 1250 PetscFunctionReturn(0); 1251 } 1252 1253 #undef __FUNCT__ 1254 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ" 1255 int MatIsHermitian_SeqSBAIJ(Mat A,PetscTruth *flg) 1256 { 1257 PetscFunctionBegin; 1258 *flg = PETSC_FALSE; 1259 PetscFunctionReturn(0); 1260 } 1261 1262 /* -------------------------------------------------------------------*/ 1263 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1264 MatGetRow_SeqSBAIJ, 1265 MatRestoreRow_SeqSBAIJ, 1266 MatMult_SeqSBAIJ_N, 1267 /* 4*/ MatMultAdd_SeqSBAIJ_N, 1268 MatMultTranspose_SeqSBAIJ, 1269 MatMultTransposeAdd_SeqSBAIJ, 1270 MatSolve_SeqSBAIJ_N, 1271 0, 1272 0, 1273 /*10*/ 0, 1274 0, 1275 MatCholeskyFactor_SeqSBAIJ, 1276 MatRelax_SeqSBAIJ, 1277 MatTranspose_SeqSBAIJ, 1278 /*15*/ MatGetInfo_SeqSBAIJ, 1279 MatEqual_SeqSBAIJ, 1280 MatGetDiagonal_SeqSBAIJ, 1281 MatDiagonalScale_SeqSBAIJ, 1282 MatNorm_SeqSBAIJ, 1283 /*20*/ 0, 1284 MatAssemblyEnd_SeqSBAIJ, 1285 0, 1286 MatSetOption_SeqSBAIJ, 1287 MatZeroEntries_SeqSBAIJ, 1288 /*25*/ MatZeroRows_SeqSBAIJ, 1289 0, 1290 0, 1291 MatCholeskyFactorSymbolic_SeqSBAIJ, 1292 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1293 /*30*/ MatSetUpPreallocation_SeqSBAIJ, 1294 0, 1295 MatICCFactorSymbolic_SeqSBAIJ, 1296 MatGetArray_SeqSBAIJ, 1297 MatRestoreArray_SeqSBAIJ, 1298 /*35*/ MatDuplicate_SeqSBAIJ, 1299 0, 1300 0, 1301 0, 1302 0, 1303 /*40*/ MatAXPY_SeqSBAIJ, 1304 MatGetSubMatrices_SeqSBAIJ, 1305 MatIncreaseOverlap_SeqSBAIJ, 1306 MatGetValues_SeqSBAIJ, 1307 0, 1308 /*45*/ MatPrintHelp_SeqSBAIJ, 1309 MatScale_SeqSBAIJ, 1310 0, 1311 0, 1312 0, 1313 /*50*/ MatGetBlockSize_SeqSBAIJ, 1314 MatGetRowIJ_SeqSBAIJ, 1315 MatRestoreRowIJ_SeqSBAIJ, 1316 0, 1317 0, 1318 /*55*/ 0, 1319 0, 1320 0, 1321 0, 1322 MatSetValuesBlocked_SeqSBAIJ, 1323 /*60*/ MatGetSubMatrix_SeqSBAIJ, 1324 0, 1325 0, 1326 MatGetPetscMaps_Petsc, 1327 0, 1328 /*65*/ 0, 1329 0, 1330 0, 1331 0, 1332 0, 1333 /*70*/ MatGetRowMax_SeqSBAIJ, 1334 0, 1335 0, 1336 0, 1337 0, 1338 /*75*/ 0, 1339 0, 1340 0, 1341 0, 1342 0, 1343 /*80*/ 0, 1344 0, 1345 0, 1346 #if !defined(PETSC_USE_COMPLEX) 1347 MatGetInertia_SeqSBAIJ, 1348 #else 1349 0, 1350 #endif 1351 /*85*/ MatLoad_SeqSBAIJ, 1352 MatIsSymmetric_SeqSBAIJ, 1353 MatIsStructurallySymmetric_SeqSBAIJ, 1354 MatIsHermitian_SeqSBAIJ 1355 }; 1356 1357 EXTERN_C_BEGIN 1358 #undef __FUNCT__ 1359 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1360 int MatStoreValues_SeqSBAIJ(Mat mat) 1361 { 1362 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1363 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1364 int ierr; 1365 1366 PetscFunctionBegin; 1367 if (aij->nonew != 1) { 1368 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1369 } 1370 1371 /* allocate space for values if not already there */ 1372 if (!aij->saved_values) { 1373 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1374 } 1375 1376 /* copy values over */ 1377 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1378 PetscFunctionReturn(0); 1379 } 1380 EXTERN_C_END 1381 1382 EXTERN_C_BEGIN 1383 #undef __FUNCT__ 1384 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1385 int MatRetrieveValues_SeqSBAIJ(Mat mat) 1386 { 1387 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1388 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 1389 1390 PetscFunctionBegin; 1391 if (aij->nonew != 1) { 1392 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1393 } 1394 if (!aij->saved_values) { 1395 SETERRQ(1,"Must call MatStoreValues(A);first"); 1396 } 1397 1398 /* copy values over */ 1399 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1400 PetscFunctionReturn(0); 1401 } 1402 EXTERN_C_END 1403 1404 EXTERN_C_BEGIN 1405 #undef __FUNCT__ 1406 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ" 1407 int MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,int bs,int nz,int *nnz) 1408 { 1409 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1410 int i,len,ierr,mbs,bs2; 1411 PetscTruth flg; 1412 1413 PetscFunctionBegin; 1414 B->preallocated = PETSC_TRUE; 1415 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1416 mbs = B->m/bs; 1417 bs2 = bs*bs; 1418 1419 if (mbs*bs != B->m) { 1420 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1421 } 1422 1423 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1424 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1425 if (nnz) { 1426 for (i=0; i<mbs; i++) { 1427 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1428 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],mbs); 1429 } 1430 } 1431 1432 ierr = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1433 if (!flg) { 1434 switch (bs) { 1435 case 1: 1436 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1437 B->ops->solve = MatSolve_SeqSBAIJ_1; 1438 B->ops->solves = MatSolves_SeqSBAIJ_1; 1439 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_1; 1440 B->ops->mult = MatMult_SeqSBAIJ_1; 1441 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1442 break; 1443 case 2: 1444 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1445 B->ops->solve = MatSolve_SeqSBAIJ_2; 1446 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_2; 1447 B->ops->mult = MatMult_SeqSBAIJ_2; 1448 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1449 break; 1450 case 3: 1451 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1452 B->ops->solve = MatSolve_SeqSBAIJ_3; 1453 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_3; 1454 B->ops->mult = MatMult_SeqSBAIJ_3; 1455 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1456 break; 1457 case 4: 1458 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1459 B->ops->solve = MatSolve_SeqSBAIJ_4; 1460 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_4; 1461 B->ops->mult = MatMult_SeqSBAIJ_4; 1462 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1463 break; 1464 case 5: 1465 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1466 B->ops->solve = MatSolve_SeqSBAIJ_5; 1467 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_5; 1468 B->ops->mult = MatMult_SeqSBAIJ_5; 1469 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1470 break; 1471 case 6: 1472 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1473 B->ops->solve = MatSolve_SeqSBAIJ_6; 1474 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_6; 1475 B->ops->mult = MatMult_SeqSBAIJ_6; 1476 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1477 break; 1478 case 7: 1479 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1480 B->ops->solve = MatSolve_SeqSBAIJ_7; 1481 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_7; 1482 B->ops->mult = MatMult_SeqSBAIJ_7; 1483 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1484 break; 1485 } 1486 } 1487 1488 b->mbs = mbs; 1489 b->nbs = mbs; 1490 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 1491 if (!nnz) { 1492 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1493 else if (nz <= 0) nz = 1; 1494 for (i=0; i<mbs; i++) { 1495 b->imax[i] = nz; 1496 } 1497 nz = nz*mbs; /* total nz */ 1498 } else { 1499 nz = 0; 1500 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1501 } 1502 /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1503 1504 /* allocate the matrix space */ 1505 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 1506 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1507 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1508 b->j = (int*)(b->a + nz*bs2); 1509 ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 1510 b->i = b->j + nz; 1511 b->singlemalloc = PETSC_TRUE; 1512 1513 /* pointer to beginning of each row */ 1514 b->i[0] = 0; 1515 for (i=1; i<mbs+1; i++) { 1516 b->i[i] = b->i[i-1] + b->imax[i-1]; 1517 } 1518 1519 /* b->ilen will count nonzeros in each block row so far. */ 1520 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 1521 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1522 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1523 1524 b->bs = bs; 1525 b->bs2 = bs2; 1526 b->nz = 0; 1527 b->maxnz = nz*bs2; 1528 1529 b->inew = 0; 1530 b->jnew = 0; 1531 b->anew = 0; 1532 b->a2anew = 0; 1533 b->permute = PETSC_FALSE; 1534 PetscFunctionReturn(0); 1535 } 1536 EXTERN_C_END 1537 1538 EXTERN_C_BEGIN 1539 EXTERN int MatConvert_SeqSBAIJ_SeqAIJ(Mat,const MatType,Mat*); 1540 EXTERN int MatConvert_SeqSBAIJ_SeqBAIJ(Mat,const MatType,Mat*); 1541 EXTERN_C_END 1542 1543 /*MC 1544 MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices, 1545 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1546 1547 Options Database Keys: 1548 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions() 1549 1550 Level: beginner 1551 1552 .seealso: MatCreateSeqSBAIJ 1553 M*/ 1554 1555 EXTERN_C_BEGIN 1556 #undef __FUNCT__ 1557 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1558 int MatCreate_SeqSBAIJ(Mat B) 1559 { 1560 Mat_SeqSBAIJ *b; 1561 int ierr,size; 1562 1563 PetscFunctionBegin; 1564 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1565 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1566 B->m = B->M = PetscMax(B->m,B->M); 1567 B->n = B->N = PetscMax(B->n,B->N); 1568 1569 ierr = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1570 B->data = (void*)b; 1571 ierr = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr); 1572 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1573 B->ops->destroy = MatDestroy_SeqSBAIJ; 1574 B->ops->view = MatView_SeqSBAIJ; 1575 B->factor = 0; 1576 B->lupivotthreshold = 1.0; 1577 B->mapping = 0; 1578 b->row = 0; 1579 b->icol = 0; 1580 b->reallocs = 0; 1581 b->saved_values = 0; 1582 1583 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1584 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1585 1586 b->sorted = PETSC_FALSE; 1587 b->roworiented = PETSC_TRUE; 1588 b->nonew = 0; 1589 b->diag = 0; 1590 b->solve_work = 0; 1591 b->mult_work = 0; 1592 B->spptr = 0; 1593 b->keepzeroedrows = PETSC_FALSE; 1594 b->xtoy = 0; 1595 b->XtoY = 0; 1596 1597 b->inew = 0; 1598 b->jnew = 0; 1599 b->anew = 0; 1600 b->a2anew = 0; 1601 b->permute = PETSC_FALSE; 1602 1603 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1604 "MatStoreValues_SeqSBAIJ", 1605 MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1606 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1607 "MatRetrieveValues_SeqSBAIJ", 1608 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1609 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1610 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1611 MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1612 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C", 1613 "MatConvert_SeqSBAIJ_SeqAIJ", 1614 MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr); 1615 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C", 1616 "MatConvert_SeqSBAIJ_SeqBAIJ", 1617 MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr); 1618 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C", 1619 "MatSeqSBAIJSetPreallocation_SeqSBAIJ", 1620 MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr); 1621 1622 B->symmetric = PETSC_TRUE; 1623 B->structurally_symmetric = PETSC_TRUE; 1624 B->symmetric_set = PETSC_TRUE; 1625 B->structurally_symmetric_set = PETSC_TRUE; 1626 PetscFunctionReturn(0); 1627 } 1628 EXTERN_C_END 1629 1630 #undef __FUNCT__ 1631 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1632 /*@C 1633 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1634 compressed row) format. For good matrix assembly performance the 1635 user should preallocate the matrix storage by setting the parameter nz 1636 (or the array nnz). By setting these parameters accurately, performance 1637 during matrix assembly can be increased by more than a factor of 50. 1638 1639 Collective on Mat 1640 1641 Input Parameters: 1642 + A - the symmetric matrix 1643 . bs - size of block 1644 . nz - number of block nonzeros per block row (same for all rows) 1645 - nnz - array containing the number of block nonzeros in the upper triangular plus 1646 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1647 1648 Options Database Keys: 1649 . -mat_no_unroll - uses code that does not unroll the loops in the 1650 block calculations (much slower) 1651 . -mat_block_size - size of the blocks to use 1652 1653 Level: intermediate 1654 1655 Notes: 1656 Specify the preallocated storage with either nz or nnz (not both). 1657 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1658 allocation. For additional details, see the users manual chapter on 1659 matrices. 1660 1661 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1662 @*/ 1663 int MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[]) { 1664 int ierr,(*f)(Mat,int,int,const int[]); 1665 1666 PetscFunctionBegin; 1667 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1668 if (f) { 1669 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 1670 } 1671 PetscFunctionReturn(0); 1672 } 1673 1674 #undef __FUNCT__ 1675 #define __FUNCT__ "MatCreateSeqSBAIJ" 1676 /*@C 1677 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1678 compressed row) format. For good matrix assembly performance the 1679 user should preallocate the matrix storage by setting the parameter nz 1680 (or the array nnz). By setting these parameters accurately, performance 1681 during matrix assembly can be increased by more than a factor of 50. 1682 1683 Collective on MPI_Comm 1684 1685 Input Parameters: 1686 + comm - MPI communicator, set to PETSC_COMM_SELF 1687 . bs - size of block 1688 . m - number of rows, or number of columns 1689 . nz - number of block nonzeros per block row (same for all rows) 1690 - nnz - array containing the number of block nonzeros in the upper triangular plus 1691 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1692 1693 Output Parameter: 1694 . A - the symmetric matrix 1695 1696 Options Database Keys: 1697 . -mat_no_unroll - uses code that does not unroll the loops in the 1698 block calculations (much slower) 1699 . -mat_block_size - size of the blocks to use 1700 1701 Level: intermediate 1702 1703 Notes: 1704 1705 Specify the preallocated storage with either nz or nnz (not both). 1706 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1707 allocation. For additional details, see the users manual chapter on 1708 matrices. 1709 1710 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1711 @*/ 1712 int MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A) 1713 { 1714 int ierr; 1715 1716 PetscFunctionBegin; 1717 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1718 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1719 ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1720 PetscFunctionReturn(0); 1721 } 1722 1723 #undef __FUNCT__ 1724 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1725 int MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1726 { 1727 Mat C; 1728 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1729 int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr; 1730 1731 PetscFunctionBegin; 1732 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1733 1734 *B = 0; 1735 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1736 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 1737 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1738 c = (Mat_SeqSBAIJ*)C->data; 1739 1740 C->preallocated = PETSC_TRUE; 1741 C->factor = A->factor; 1742 c->row = 0; 1743 c->icol = 0; 1744 c->saved_values = 0; 1745 c->keepzeroedrows = a->keepzeroedrows; 1746 C->assembled = PETSC_TRUE; 1747 1748 C->M = A->M; 1749 C->N = A->N; 1750 c->bs = a->bs; 1751 c->bs2 = a->bs2; 1752 c->mbs = a->mbs; 1753 c->nbs = a->nbs; 1754 1755 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1756 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1757 for (i=0; i<mbs; i++) { 1758 c->imax[i] = a->imax[i]; 1759 c->ilen[i] = a->ilen[i]; 1760 } 1761 1762 /* allocate the matrix space */ 1763 c->singlemalloc = PETSC_TRUE; 1764 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1765 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1766 c->j = (int*)(c->a + nz*bs2); 1767 c->i = c->j + nz; 1768 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1769 if (mbs > 0) { 1770 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1771 if (cpvalues == MAT_COPY_VALUES) { 1772 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1773 } else { 1774 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1775 } 1776 } 1777 1778 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1779 c->sorted = a->sorted; 1780 c->roworiented = a->roworiented; 1781 c->nonew = a->nonew; 1782 1783 if (a->diag) { 1784 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1785 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1786 for (i=0; i<mbs; i++) { 1787 c->diag[i] = a->diag[i]; 1788 } 1789 } else c->diag = 0; 1790 c->nz = a->nz; 1791 c->maxnz = a->maxnz; 1792 c->solve_work = 0; 1793 c->mult_work = 0; 1794 *B = C; 1795 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1796 PetscFunctionReturn(0); 1797 } 1798 1799 #undef __FUNCT__ 1800 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1801 int MatLoad_SeqSBAIJ(PetscViewer viewer,const MatType type,Mat *A) 1802 { 1803 Mat_SeqSBAIJ *a; 1804 Mat B; 1805 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1806 int *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 1807 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1808 int *masked,nmask,tmp,bs2,ishift; 1809 PetscScalar *aa; 1810 MPI_Comm comm = ((PetscObject)viewer)->comm; 1811 1812 PetscFunctionBegin; 1813 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1814 bs2 = bs*bs; 1815 1816 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1817 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1818 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1819 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1820 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1821 M = header[1]; N = header[2]; nz = header[3]; 1822 1823 if (header[3] < 0) { 1824 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1825 } 1826 1827 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1828 1829 /* 1830 This code adds extra rows to make sure the number of rows is 1831 divisible by the blocksize 1832 */ 1833 mbs = M/bs; 1834 extra_rows = bs - M + bs*(mbs); 1835 if (extra_rows == bs) extra_rows = 0; 1836 else mbs++; 1837 if (extra_rows) { 1838 PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n"); 1839 } 1840 1841 /* read in row lengths */ 1842 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1843 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1844 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1845 1846 /* read in column indices */ 1847 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1848 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1849 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1850 1851 /* loop over row lengths determining block row lengths */ 1852 ierr = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr); 1853 ierr = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1854 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1855 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1856 masked = mask + mbs; 1857 rowcount = 0; nzcount = 0; 1858 for (i=0; i<mbs; i++) { 1859 nmask = 0; 1860 for (j=0; j<bs; j++) { 1861 kmax = rowlengths[rowcount]; 1862 for (k=0; k<kmax; k++) { 1863 tmp = jj[nzcount++]/bs; /* block col. index */ 1864 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1865 } 1866 rowcount++; 1867 } 1868 s_browlengths[i] += nmask; 1869 1870 /* zero out the mask elements we set */ 1871 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1872 } 1873 1874 /* create our matrix */ 1875 ierr = MatCreate(comm,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows,&B);CHKERRQ(ierr); 1876 ierr = MatSetType(B,type);CHKERRQ(ierr); 1877 ierr = MatSeqSBAIJSetPreallocation(B,bs,0,s_browlengths);CHKERRQ(ierr); 1878 a = (Mat_SeqSBAIJ*)B->data; 1879 1880 /* set matrix "i" values */ 1881 a->i[0] = 0; 1882 for (i=1; i<= mbs; i++) { 1883 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1884 a->ilen[i-1] = s_browlengths[i-1]; 1885 } 1886 a->nz = a->i[mbs]; 1887 1888 /* read in nonzero values */ 1889 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1890 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1891 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1892 1893 /* set "a" and "j" values into matrix */ 1894 nzcount = 0; jcount = 0; 1895 for (i=0; i<mbs; i++) { 1896 nzcountb = nzcount; 1897 nmask = 0; 1898 for (j=0; j<bs; j++) { 1899 kmax = rowlengths[i*bs+j]; 1900 for (k=0; k<kmax; k++) { 1901 tmp = jj[nzcount++]/bs; /* block col. index */ 1902 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1903 } 1904 } 1905 /* sort the masked values */ 1906 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1907 1908 /* set "j" values into matrix */ 1909 maskcount = 1; 1910 for (j=0; j<nmask; j++) { 1911 a->j[jcount++] = masked[j]; 1912 mask[masked[j]] = maskcount++; 1913 } 1914 1915 /* set "a" values into matrix */ 1916 ishift = bs2*a->i[i]; 1917 for (j=0; j<bs; j++) { 1918 kmax = rowlengths[i*bs+j]; 1919 for (k=0; k<kmax; k++) { 1920 tmp = jj[nzcountb]/bs ; /* block col. index */ 1921 if (tmp >= i){ 1922 block = mask[tmp] - 1; 1923 point = jj[nzcountb] - bs*tmp; 1924 idx = ishift + bs2*block + j + bs*point; 1925 a->a[idx] = aa[nzcountb]; 1926 } 1927 nzcountb++; 1928 } 1929 } 1930 /* zero out the mask elements we set */ 1931 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1932 } 1933 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1934 1935 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1936 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1937 ierr = PetscFree(aa);CHKERRQ(ierr); 1938 ierr = PetscFree(jj);CHKERRQ(ierr); 1939 ierr = PetscFree(mask);CHKERRQ(ierr); 1940 1941 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1942 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1943 ierr = MatView_Private(B);CHKERRQ(ierr); 1944 *A = B; 1945 PetscFunctionReturn(0); 1946 } 1947 1948 #undef __FUNCT__ 1949 #define __FUNCT__ "MatRelax_SeqSBAIJ" 1950 int MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1951 { 1952 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1953 MatScalar *aa=a->a,*v,*v1; 1954 PetscScalar *x,*b,*t,sum,d; 1955 int m=a->mbs,bs=a->bs,*ai=a->i,*aj=a->j,ierr; 1956 int nz,nz1,*vj,*vj1,i; 1957 1958 PetscFunctionBegin; 1959 its = its*lits; 1960 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1961 1962 if (bs > 1) 1963 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 1964 1965 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1966 if (xx != bb) { 1967 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1968 } else { 1969 b = x; 1970 } 1971 1972 ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr); 1973 1974 if (flag & SOR_ZERO_INITIAL_GUESS) { 1975 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1976 for (i=0; i<m; i++) 1977 t[i] = b[i]; 1978 1979 for (i=0; i<m; i++){ 1980 d = *(aa + ai[i]); /* diag[i] */ 1981 v = aa + ai[i] + 1; 1982 vj = aj + ai[i] + 1; 1983 nz = ai[i+1] - ai[i] - 1; 1984 x[i] = omega*t[i]/d; 1985 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 1986 PetscLogFlops(2*nz-1); 1987 } 1988 } 1989 1990 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 1991 for (i=0; i<m; i++) 1992 t[i] = b[i]; 1993 1994 for (i=0; i<m-1; i++){ /* update rhs */ 1995 v = aa + ai[i] + 1; 1996 vj = aj + ai[i] + 1; 1997 nz = ai[i+1] - ai[i] - 1; 1998 while (nz--) t[*vj++] -= x[i]*(*v++); 1999 PetscLogFlops(2*nz-1); 2000 } 2001 for (i=m-1; i>=0; i--){ 2002 d = *(aa + ai[i]); 2003 v = aa + ai[i] + 1; 2004 vj = aj + ai[i] + 1; 2005 nz = ai[i+1] - ai[i] - 1; 2006 sum = t[i]; 2007 while (nz--) sum -= x[*vj++]*(*v++); 2008 PetscLogFlops(2*nz-1); 2009 x[i] = (1-omega)*x[i] + omega*sum/d; 2010 } 2011 } 2012 its--; 2013 } 2014 2015 while (its--) { 2016 /* 2017 forward sweep: 2018 for i=0,...,m-1: 2019 sum[i] = (b[i] - U(i,:)x )/d[i]; 2020 x[i] = (1-omega)x[i] + omega*sum[i]; 2021 b = b - x[i]*U^T(i,:); 2022 2023 */ 2024 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2025 for (i=0; i<m; i++) 2026 t[i] = b[i]; 2027 2028 for (i=0; i<m; i++){ 2029 d = *(aa + ai[i]); /* diag[i] */ 2030 v = aa + ai[i] + 1; v1=v; 2031 vj = aj + ai[i] + 1; vj1=vj; 2032 nz = ai[i+1] - ai[i] - 1; nz1=nz; 2033 sum = t[i]; 2034 while (nz1--) sum -= (*v1++)*x[*vj1++]; 2035 x[i] = (1-omega)*x[i] + omega*sum/d; 2036 while (nz--) t[*vj++] -= x[i]*(*v++); 2037 PetscLogFlops(4*nz-2); 2038 } 2039 } 2040 2041 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2042 /* 2043 backward sweep: 2044 b = b - x[i]*U^T(i,:), i=0,...,n-2 2045 for i=m-1,...,0: 2046 sum[i] = (b[i] - U(i,:)x )/d[i]; 2047 x[i] = (1-omega)x[i] + omega*sum[i]; 2048 */ 2049 for (i=0; i<m; i++) 2050 t[i] = b[i]; 2051 2052 for (i=0; i<m-1; i++){ /* update rhs */ 2053 v = aa + ai[i] + 1; 2054 vj = aj + ai[i] + 1; 2055 nz = ai[i+1] - ai[i] - 1; 2056 while (nz--) t[*vj++] -= x[i]*(*v++); 2057 PetscLogFlops(2*nz-1); 2058 } 2059 for (i=m-1; i>=0; i--){ 2060 d = *(aa + ai[i]); 2061 v = aa + ai[i] + 1; 2062 vj = aj + ai[i] + 1; 2063 nz = ai[i+1] - ai[i] - 1; 2064 sum = t[i]; 2065 while (nz--) sum -= x[*vj++]*(*v++); 2066 PetscLogFlops(2*nz-1); 2067 x[i] = (1-omega)*x[i] + omega*sum/d; 2068 } 2069 } 2070 } 2071 2072 ierr = PetscFree(t);CHKERRQ(ierr); 2073 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2074 if (bb != xx) { 2075 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2076 } 2077 2078 PetscFunctionReturn(0); 2079 } 2080 2081 2082 2083 2084 2085 2086