1 2 /* 3 Defines the basic matrix operations for the SBAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 7 #include "src/inline/spops.h" 8 #include "src/mat/impls/sbaij/seq/sbaij.h" 9 10 #define CHUNKSIZE 10 11 12 /* 13 Checks for missing diagonals 14 */ 15 #undef __FUNCT__ 16 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 17 PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A) 18 { 19 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 20 PetscErrorCode ierr; 21 int *diag,*jj = a->j,i; 22 23 PetscFunctionBegin; 24 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 25 diag = a->diag; 26 for (i=0; i<a->mbs; i++) { 27 if (jj[diag[i]] != i) SETERRQ1(1,"Matrix is missing diagonal number %d",i); 28 } 29 PetscFunctionReturn(0); 30 } 31 32 #undef __FUNCT__ 33 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 34 PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A) 35 { 36 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 37 PetscErrorCode ierr; 38 int i,mbs = a->mbs; 39 40 PetscFunctionBegin; 41 if (a->diag) PetscFunctionReturn(0); 42 43 ierr = PetscMalloc((mbs+1)*sizeof(int),&a->diag);CHKERRQ(ierr); 44 PetscLogObjectMemory(A,(mbs+1)*sizeof(int)); 45 for (i=0; i<mbs; i++) a->diag[i] = a->i[i]; 46 PetscFunctionReturn(0); 47 } 48 49 #undef __FUNCT__ 50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 51 static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done) 52 { 53 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 54 int n = a->mbs,i; 55 56 PetscFunctionBegin; 57 *nn = n; 58 if (!ia) PetscFunctionReturn(0); 59 60 if (oshift == 1) { 61 /* temporarily add 1 to i and j indices */ 62 int nz = a->i[n]; 63 for (i=0; i<nz; i++) a->j[i]++; 64 for (i=0; i<n+1; i++) a->i[i]++; 65 *ia = a->i; *ja = a->j; 66 } else { 67 *ia = a->i; *ja = a->j; 68 } 69 PetscFunctionReturn(0); 70 } 71 72 #undef __FUNCT__ 73 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 74 static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done) 75 { 76 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 77 int i,n = a->mbs; 78 79 PetscFunctionBegin; 80 if (!ia) PetscFunctionReturn(0); 81 82 if (oshift == 1) { 83 int nz = a->i[n]-1; 84 for (i=0; i<nz; i++) a->j[i]--; 85 for (i=0; i<n+1; i++) a->i[i]--; 86 } 87 PetscFunctionReturn(0); 88 } 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatGetBlockSize_SeqSBAIJ" 92 PetscErrorCode MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs) 93 { 94 Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data; 95 96 PetscFunctionBegin; 97 *bs = sbaij->bs; 98 PetscFunctionReturn(0); 99 } 100 101 #undef __FUNCT__ 102 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 103 PetscErrorCode MatDestroy_SeqSBAIJ(Mat A) 104 { 105 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 106 PetscErrorCode ierr; 107 108 PetscFunctionBegin; 109 #if defined(PETSC_USE_LOG) 110 PetscLogObjectState((PetscObject)A,"Rows=%d, NZ=%d",A->m,a->nz); 111 #endif 112 ierr = PetscFree(a->a);CHKERRQ(ierr); 113 if (!a->singlemalloc) { 114 ierr = PetscFree(a->i);CHKERRQ(ierr); 115 ierr = PetscFree(a->j);CHKERRQ(ierr); 116 } 117 if (a->row) { 118 ierr = ISDestroy(a->row);CHKERRQ(ierr); 119 } 120 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 121 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 122 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 123 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 124 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 125 if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);} 126 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 127 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 128 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 129 130 if (a->inew){ 131 ierr = PetscFree(a->inew);CHKERRQ(ierr); 132 a->inew = 0; 133 } 134 ierr = PetscFree(a);CHKERRQ(ierr); 135 PetscFunctionReturn(0); 136 } 137 138 #undef __FUNCT__ 139 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 140 PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op) 141 { 142 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 143 144 PetscFunctionBegin; 145 switch (op) { 146 case MAT_ROW_ORIENTED: 147 a->roworiented = PETSC_TRUE; 148 break; 149 case MAT_COLUMN_ORIENTED: 150 a->roworiented = PETSC_FALSE; 151 break; 152 case MAT_COLUMNS_SORTED: 153 a->sorted = PETSC_TRUE; 154 break; 155 case MAT_COLUMNS_UNSORTED: 156 a->sorted = PETSC_FALSE; 157 break; 158 case MAT_KEEP_ZEROED_ROWS: 159 a->keepzeroedrows = PETSC_TRUE; 160 break; 161 case MAT_NO_NEW_NONZERO_LOCATIONS: 162 a->nonew = 1; 163 break; 164 case MAT_NEW_NONZERO_LOCATION_ERR: 165 a->nonew = -1; 166 break; 167 case MAT_NEW_NONZERO_ALLOCATION_ERR: 168 a->nonew = -2; 169 break; 170 case MAT_YES_NEW_NONZERO_LOCATIONS: 171 a->nonew = 0; 172 break; 173 case MAT_ROWS_SORTED: 174 case MAT_ROWS_UNSORTED: 175 case MAT_YES_NEW_DIAGONALS: 176 case MAT_IGNORE_OFF_PROC_ENTRIES: 177 case MAT_USE_HASH_TABLE: 178 PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n"); 179 break; 180 case MAT_NO_NEW_DIAGONALS: 181 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 182 case MAT_NOT_SYMMETRIC: 183 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 184 case MAT_HERMITIAN: 185 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 186 case MAT_SYMMETRIC: 187 case MAT_STRUCTURALLY_SYMMETRIC: 188 case MAT_NOT_HERMITIAN: 189 case MAT_SYMMETRY_ETERNAL: 190 case MAT_NOT_SYMMETRY_ETERNAL: 191 break; 192 default: 193 SETERRQ(PETSC_ERR_SUP,"unknown option"); 194 } 195 PetscFunctionReturn(0); 196 } 197 198 #undef __FUNCT__ 199 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 200 PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,PetscScalar **v) 201 { 202 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 203 PetscErrorCode ierr; 204 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2; 205 MatScalar *aa,*aa_i; 206 PetscScalar *v_i; 207 208 PetscFunctionBegin; 209 bs = a->bs; 210 ai = a->i; 211 aj = a->j; 212 aa = a->a; 213 bs2 = a->bs2; 214 215 if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %d out of range", row); 216 217 bn = row/bs; /* Block number */ 218 bp = row % bs; /* Block position */ 219 M = ai[bn+1] - ai[bn]; 220 *ncols = bs*M; 221 222 if (v) { 223 *v = 0; 224 if (*ncols) { 225 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 226 for (i=0; i<M; i++) { /* for each block in the block row */ 227 v_i = *v + i*bs; 228 aa_i = aa + bs2*(ai[bn] + i); 229 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 230 } 231 } 232 } 233 234 if (cols) { 235 *cols = 0; 236 if (*ncols) { 237 ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr); 238 for (i=0; i<M; i++) { /* for each block in the block row */ 239 cols_i = *cols + i*bs; 240 itmp = bs*aj[ai[bn] + i]; 241 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 242 } 243 } 244 } 245 246 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 247 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 248 #ifdef column_search 249 v_i = *v + M*bs; 250 cols_i = *cols + M*bs; 251 for (i=0; i<bn; i++){ /* for each block row */ 252 M = ai[i+1] - ai[i]; 253 for (j=0; j<M; j++){ 254 itmp = aj[ai[i] + j]; /* block column value */ 255 if (itmp == bn){ 256 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 257 for (k=0; k<bs; k++) { 258 *cols_i++ = i*bs+k; 259 *v_i++ = aa_i[k]; 260 } 261 *ncols += bs; 262 break; 263 } 264 } 265 } 266 #endif 267 268 PetscFunctionReturn(0); 269 } 270 271 #undef __FUNCT__ 272 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 273 PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 274 { 275 PetscErrorCode ierr; 276 277 PetscFunctionBegin; 278 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 279 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 280 PetscFunctionReturn(0); 281 } 282 283 #undef __FUNCT__ 284 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 285 PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,Mat *B) 286 { 287 PetscErrorCode ierr; 288 PetscFunctionBegin; 289 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 290 PetscFunctionReturn(0); 291 } 292 293 #undef __FUNCT__ 294 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 295 static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 296 { 297 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 298 PetscErrorCode ierr; 299 int i,j,bs = a->bs,k,l,bs2=a->bs2; 300 char *name; 301 PetscViewerFormat format; 302 303 PetscFunctionBegin; 304 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 305 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 306 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 307 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 308 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 309 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 310 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 311 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 312 for (i=0; i<a->mbs; i++) { 313 for (j=0; j<bs; j++) { 314 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 315 for (k=a->i[i]; k<a->i[i+1]; k++) { 316 for (l=0; l<bs; l++) { 317 #if defined(PETSC_USE_COMPLEX) 318 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 319 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 320 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 321 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 322 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 323 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 324 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 325 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 326 } 327 #else 328 if (a->a[bs2*k + l*bs + j] != 0.0) { 329 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 330 } 331 #endif 332 } 333 } 334 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 335 } 336 } 337 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 338 } else { 339 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 340 for (i=0; i<a->mbs; i++) { 341 for (j=0; j<bs; j++) { 342 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 343 for (k=a->i[i]; k<a->i[i+1]; k++) { 344 for (l=0; l<bs; l++) { 345 #if defined(PETSC_USE_COMPLEX) 346 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 347 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 348 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 349 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 350 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 351 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 352 } else { 353 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 354 } 355 #else 356 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 357 #endif 358 } 359 } 360 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 361 } 362 } 363 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 364 } 365 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 366 PetscFunctionReturn(0); 367 } 368 369 #undef __FUNCT__ 370 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 371 static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 372 { 373 Mat A = (Mat) Aa; 374 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 375 PetscErrorCode ierr; 376 int row,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank; 377 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 378 MatScalar *aa; 379 MPI_Comm comm; 380 PetscViewer viewer; 381 382 PetscFunctionBegin; 383 /* 384 This is nasty. If this is called from an originally parallel matrix 385 then all processes call this,but only the first has the matrix so the 386 rest should return immediately. 387 */ 388 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 389 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 390 if (rank) PetscFunctionReturn(0); 391 392 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 393 394 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 395 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 396 397 /* loop over matrix elements drawing boxes */ 398 color = PETSC_DRAW_BLUE; 399 for (i=0,row=0; i<mbs; i++,row+=bs) { 400 for (j=a->i[i]; j<a->i[i+1]; j++) { 401 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 402 x_l = a->j[j]*bs; x_r = x_l + 1.0; 403 aa = a->a + j*bs2; 404 for (k=0; k<bs; k++) { 405 for (l=0; l<bs; l++) { 406 if (PetscRealPart(*aa++) >= 0.) continue; 407 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 408 } 409 } 410 } 411 } 412 color = PETSC_DRAW_CYAN; 413 for (i=0,row=0; i<mbs; i++,row+=bs) { 414 for (j=a->i[i]; j<a->i[i+1]; j++) { 415 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 416 x_l = a->j[j]*bs; x_r = x_l + 1.0; 417 aa = a->a + j*bs2; 418 for (k=0; k<bs; k++) { 419 for (l=0; l<bs; l++) { 420 if (PetscRealPart(*aa++) != 0.) continue; 421 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 422 } 423 } 424 } 425 } 426 427 color = PETSC_DRAW_RED; 428 for (i=0,row=0; i<mbs; i++,row+=bs) { 429 for (j=a->i[i]; j<a->i[i+1]; j++) { 430 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 431 x_l = a->j[j]*bs; x_r = x_l + 1.0; 432 aa = a->a + j*bs2; 433 for (k=0; k<bs; k++) { 434 for (l=0; l<bs; l++) { 435 if (PetscRealPart(*aa++) <= 0.) continue; 436 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 437 } 438 } 439 } 440 } 441 PetscFunctionReturn(0); 442 } 443 444 #undef __FUNCT__ 445 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 446 static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 447 { 448 PetscErrorCode ierr; 449 PetscReal xl,yl,xr,yr,w,h; 450 PetscDraw draw; 451 PetscTruth isnull; 452 453 PetscFunctionBegin; 454 455 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 456 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 457 458 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 459 xr = A->m; yr = A->m; h = yr/10.0; w = xr/10.0; 460 xr += w; yr += h; xl = -w; yl = -h; 461 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 462 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 463 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 464 PetscFunctionReturn(0); 465 } 466 467 #undef __FUNCT__ 468 #define __FUNCT__ "MatView_SeqSBAIJ" 469 PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 470 { 471 PetscErrorCode ierr; 472 PetscTruth iascii,isdraw; 473 474 PetscFunctionBegin; 475 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 476 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 477 if (iascii){ 478 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 479 } else if (isdraw) { 480 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 481 } else { 482 Mat B; 483 ierr = MatConvert(A,MATSEQAIJ,&B);CHKERRQ(ierr); 484 ierr = MatView(B,viewer);CHKERRQ(ierr); 485 ierr = MatDestroy(B);CHKERRQ(ierr); 486 } 487 PetscFunctionReturn(0); 488 } 489 490 491 #undef __FUNCT__ 492 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 493 PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[]) 494 { 495 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 496 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 497 int *ai = a->i,*ailen = a->ilen; 498 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 499 MatScalar *ap,*aa = a->a,zero = 0.0; 500 501 PetscFunctionBegin; 502 for (k=0; k<m; k++) { /* loop over rows */ 503 row = im[k]; brow = row/bs; 504 if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",row); 505 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1); 506 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 507 nrow = ailen[brow]; 508 for (l=0; l<n; l++) { /* loop over columns */ 509 if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",in[l]); 510 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1); 511 col = in[l] ; 512 bcol = col/bs; 513 cidx = col%bs; 514 ridx = row%bs; 515 high = nrow; 516 low = 0; /* assume unsorted */ 517 while (high-low > 5) { 518 t = (low+high)/2; 519 if (rp[t] > bcol) high = t; 520 else low = t; 521 } 522 for (i=low; i<high; i++) { 523 if (rp[i] > bcol) break; 524 if (rp[i] == bcol) { 525 *v++ = ap[bs2*i+bs*cidx+ridx]; 526 goto finished; 527 } 528 } 529 *v++ = zero; 530 finished:; 531 } 532 } 533 PetscFunctionReturn(0); 534 } 535 536 537 #undef __FUNCT__ 538 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 539 PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is) 540 { 541 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 542 PetscErrorCode ierr; 543 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 544 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 545 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval; 546 PetscTruth roworiented=a->roworiented; 547 const MatScalar *value = v; 548 MatScalar *ap,*aa = a->a,*bap; 549 550 PetscFunctionBegin; 551 if (roworiented) { 552 stepval = (n-1)*bs; 553 } else { 554 stepval = (m-1)*bs; 555 } 556 for (k=0; k<m; k++) { /* loop over added rows */ 557 row = im[k]; 558 if (row < 0) continue; 559 #if defined(PETSC_USE_BOPT_g) 560 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1); 561 #endif 562 rp = aj + ai[row]; 563 ap = aa + bs2*ai[row]; 564 rmax = imax[row]; 565 nrow = ailen[row]; 566 low = 0; 567 for (l=0; l<n; l++) { /* loop over added columns */ 568 if (in[l] < 0) continue; 569 col = in[l]; 570 #if defined(PETSC_USE_BOPT_g) 571 if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",col,a->nbs-1); 572 #endif 573 if (col < row) continue; /* ignore lower triangular block */ 574 if (roworiented) { 575 value = v + k*(stepval+bs)*bs + l*bs; 576 } else { 577 value = v + l*(stepval+bs)*bs + k*bs; 578 } 579 if (!sorted) low = 0; high = nrow; 580 while (high-low > 7) { 581 t = (low+high)/2; 582 if (rp[t] > col) high = t; 583 else low = t; 584 } 585 for (i=low; i<high; i++) { 586 if (rp[i] > col) break; 587 if (rp[i] == col) { 588 bap = ap + bs2*i; 589 if (roworiented) { 590 if (is == ADD_VALUES) { 591 for (ii=0; ii<bs; ii++,value+=stepval) { 592 for (jj=ii; jj<bs2; jj+=bs) { 593 bap[jj] += *value++; 594 } 595 } 596 } else { 597 for (ii=0; ii<bs; ii++,value+=stepval) { 598 for (jj=ii; jj<bs2; jj+=bs) { 599 bap[jj] = *value++; 600 } 601 } 602 } 603 } else { 604 if (is == ADD_VALUES) { 605 for (ii=0; ii<bs; ii++,value+=stepval) { 606 for (jj=0; jj<bs; jj++) { 607 *bap++ += *value++; 608 } 609 } 610 } else { 611 for (ii=0; ii<bs; ii++,value+=stepval) { 612 for (jj=0; jj<bs; jj++) { 613 *bap++ = *value++; 614 } 615 } 616 } 617 } 618 goto noinsert2; 619 } 620 } 621 if (nonew == 1) goto noinsert2; 622 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 623 if (nrow >= rmax) { 624 /* there is no extra room in row, therefore enlarge */ 625 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 626 MatScalar *new_a; 627 628 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 629 630 /* malloc new storage space */ 631 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 632 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 633 new_j = (int*)(new_a + bs2*new_nz); 634 new_i = new_j + new_nz; 635 636 /* copy over old data into new slots */ 637 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 638 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 639 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 640 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 641 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 642 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 643 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 644 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 645 /* free up old matrix storage */ 646 ierr = PetscFree(a->a);CHKERRQ(ierr); 647 if (!a->singlemalloc) { 648 ierr = PetscFree(a->i);CHKERRQ(ierr); 649 ierr = PetscFree(a->j);CHKERRQ(ierr); 650 } 651 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 652 a->singlemalloc = PETSC_TRUE; 653 654 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 655 rmax = imax[row] = imax[row] + CHUNKSIZE; 656 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 657 a->maxnz += bs2*CHUNKSIZE; 658 a->reallocs++; 659 a->nz++; 660 } 661 N = nrow++ - 1; 662 /* shift up all the later entries in this row */ 663 for (ii=N; ii>=i; ii--) { 664 rp[ii+1] = rp[ii]; 665 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 666 } 667 if (N >= i) { 668 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 669 } 670 rp[i] = col; 671 bap = ap + bs2*i; 672 if (roworiented) { 673 for (ii=0; ii<bs; ii++,value+=stepval) { 674 for (jj=ii; jj<bs2; jj+=bs) { 675 bap[jj] = *value++; 676 } 677 } 678 } else { 679 for (ii=0; ii<bs; ii++,value+=stepval) { 680 for (jj=0; jj<bs; jj++) { 681 *bap++ = *value++; 682 } 683 } 684 } 685 noinsert2:; 686 low = i; 687 } 688 ailen[row] = nrow; 689 } 690 PetscFunctionReturn(0); 691 } 692 693 #undef __FUNCT__ 694 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 695 PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 696 { 697 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 698 PetscErrorCode ierr; 699 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 700 int m = A->m,*ip,N,*ailen = a->ilen; 701 int mbs = a->mbs,bs2 = a->bs2,rmax = 0; 702 MatScalar *aa = a->a,*ap; 703 704 PetscFunctionBegin; 705 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 706 707 if (m) rmax = ailen[0]; 708 for (i=1; i<mbs; i++) { 709 /* move each row back by the amount of empty slots (fshift) before it*/ 710 fshift += imax[i-1] - ailen[i-1]; 711 rmax = PetscMax(rmax,ailen[i]); 712 if (fshift) { 713 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 714 N = ailen[i]; 715 for (j=0; j<N; j++) { 716 ip[j-fshift] = ip[j]; 717 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 718 } 719 } 720 ai[i] = ai[i-1] + ailen[i-1]; 721 } 722 if (mbs) { 723 fshift += imax[mbs-1] - ailen[mbs-1]; 724 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 725 } 726 /* reset ilen and imax for each row */ 727 for (i=0; i<mbs; i++) { 728 ailen[i] = imax[i] = ai[i+1] - ai[i]; 729 } 730 a->nz = ai[mbs]; 731 732 /* diagonals may have moved, reset it */ 733 if (a->diag) { 734 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(int));CHKERRQ(ierr); 735 } 736 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 737 m,A->m,a->bs,fshift*bs2,a->nz*bs2); 738 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n", 739 a->reallocs); 740 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 741 a->reallocs = 0; 742 A->info.nz_unneeded = (PetscReal)fshift*bs2; 743 744 PetscFunctionReturn(0); 745 } 746 747 /* 748 This function returns an array of flags which indicate the locations of contiguous 749 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 750 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 751 Assume: sizes should be long enough to hold all the values. 752 */ 753 #undef __FUNCT__ 754 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 755 PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 756 { 757 int i,j,k,row; 758 PetscTruth flg; 759 760 PetscFunctionBegin; 761 for (i=0,j=0; i<n; j++) { 762 row = idx[i]; 763 if (row%bs!=0) { /* Not the begining of a block */ 764 sizes[j] = 1; 765 i++; 766 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 767 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 768 i++; 769 } else { /* Begining of the block, so check if the complete block exists */ 770 flg = PETSC_TRUE; 771 for (k=1; k<bs; k++) { 772 if (row+k != idx[i+k]) { /* break in the block */ 773 flg = PETSC_FALSE; 774 break; 775 } 776 } 777 if (flg == PETSC_TRUE) { /* No break in the bs */ 778 sizes[j] = bs; 779 i+= bs; 780 } else { 781 sizes[j] = 1; 782 i++; 783 } 784 } 785 } 786 *bs_max = j; 787 PetscFunctionReturn(0); 788 } 789 790 #undef __FUNCT__ 791 #define __FUNCT__ "MatZeroRows_SeqSBAIJ" 792 PetscErrorCode MatZeroRows_SeqSBAIJ(Mat A,IS is,const PetscScalar *diag) 793 { 794 PetscFunctionBegin; 795 SETERRQ(PETSC_ERR_SUP,"No support for this function yet"); 796 } 797 798 /* Only add/insert a(i,j) with i<=j (blocks). 799 Any a(i,j) with i>j input by user is ingored. 800 */ 801 802 #undef __FUNCT__ 803 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 804 PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is) 805 { 806 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 807 PetscErrorCode ierr; 808 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 809 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 810 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 811 int ridx,cidx,bs2=a->bs2; 812 MatScalar *ap,value,*aa=a->a,*bap; 813 814 PetscFunctionBegin; 815 816 for (k=0; k<m; k++) { /* loop over added rows */ 817 row = im[k]; /* row number */ 818 brow = row/bs; /* block row number */ 819 if (row < 0) continue; 820 #if defined(PETSC_USE_BOPT_g) 821 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1); 822 #endif 823 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 824 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 825 rmax = imax[brow]; /* maximum space allocated for this row */ 826 nrow = ailen[brow]; /* actual length of this row */ 827 low = 0; 828 829 for (l=0; l<n; l++) { /* loop over added columns */ 830 if (in[l] < 0) continue; 831 #if defined(PETSC_USE_BOPT_g) 832 if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m-1); 833 #endif 834 col = in[l]; 835 bcol = col/bs; /* block col number */ 836 837 if (brow <= bcol){ 838 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 839 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 840 /* element value a(k,l) */ 841 if (roworiented) { 842 value = v[l + k*n]; 843 } else { 844 value = v[k + l*m]; 845 } 846 847 /* move pointer bap to a(k,l) quickly and add/insert value */ 848 if (!sorted) low = 0; high = nrow; 849 while (high-low > 7) { 850 t = (low+high)/2; 851 if (rp[t] > bcol) high = t; 852 else low = t; 853 } 854 for (i=low; i<high; i++) { 855 /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */ 856 if (rp[i] > bcol) break; 857 if (rp[i] == bcol) { 858 bap = ap + bs2*i + bs*cidx + ridx; 859 if (is == ADD_VALUES) *bap += value; 860 else *bap = value; 861 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 862 if (brow == bcol && ridx < cidx){ 863 bap = ap + bs2*i + bs*ridx + cidx; 864 if (is == ADD_VALUES) *bap += value; 865 else *bap = value; 866 } 867 goto noinsert1; 868 } 869 } 870 871 if (nonew == 1) goto noinsert1; 872 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 873 if (nrow >= rmax) { 874 /* there is no extra room in row, therefore enlarge */ 875 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 876 MatScalar *new_a; 877 878 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 879 880 /* Malloc new storage space */ 881 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 882 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 883 new_j = (int*)(new_a + bs2*new_nz); 884 new_i = new_j + new_nz; 885 886 /* copy over old data into new slots */ 887 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 888 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 889 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 890 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 891 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 892 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 893 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 894 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 895 /* free up old matrix storage */ 896 ierr = PetscFree(a->a);CHKERRQ(ierr); 897 if (!a->singlemalloc) { 898 ierr = PetscFree(a->i);CHKERRQ(ierr); 899 ierr = PetscFree(a->j);CHKERRQ(ierr); 900 } 901 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 902 a->singlemalloc = PETSC_TRUE; 903 904 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 905 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 906 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 907 a->maxnz += bs2*CHUNKSIZE; 908 a->reallocs++; 909 a->nz++; 910 } 911 912 N = nrow++ - 1; 913 /* shift up all the later entries in this row */ 914 for (ii=N; ii>=i; ii--) { 915 rp[ii+1] = rp[ii]; 916 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 917 } 918 if (N>=i) { 919 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 920 } 921 rp[i] = bcol; 922 ap[bs2*i + bs*cidx + ridx] = value; 923 noinsert1:; 924 low = i; 925 /* } */ 926 } 927 } /* end of if .. if.. */ 928 } /* end of loop over added columns */ 929 ailen[brow] = nrow; 930 } /* end of loop over added rows */ 931 932 PetscFunctionReturn(0); 933 } 934 935 EXTERN PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*); 936 EXTERN PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*); 937 EXTERN PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS[],int); 938 EXTERN PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*); 939 EXTERN PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat,int,const IS[],const IS[],MatReuse,Mat*[]); 940 EXTERN PetscErrorCode MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec); 941 EXTERN PetscErrorCode MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec); 942 EXTERN PetscErrorCode MatScale_SeqSBAIJ(const PetscScalar*,Mat); 943 EXTERN PetscErrorCode MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *); 944 EXTERN PetscErrorCode MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*); 945 EXTERN PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat,Vec); 946 EXTERN PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec); 947 EXTERN PetscErrorCode MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *); 948 EXTERN PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat); 949 EXTERN PetscErrorCode MatGetRowMax_SeqSBAIJ(Mat,Vec); 950 951 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N(Mat,Vec,Vec); 952 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1(Mat,Vec,Vec); 953 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2(Mat,Vec,Vec); 954 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3(Mat,Vec,Vec); 955 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4(Mat,Vec,Vec); 956 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5(Mat,Vec,Vec); 957 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6(Mat,Vec,Vec); 958 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7(Mat,Vec,Vec); 959 EXTERN PetscErrorCode MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec); 960 EXTERN PetscErrorCode MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec); 961 EXTERN PetscErrorCode MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec); 962 EXTERN PetscErrorCode MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec); 963 EXTERN PetscErrorCode MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec); 964 EXTERN PetscErrorCode MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec); 965 EXTERN PetscErrorCode MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec); 966 967 EXTERN PetscErrorCode MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs); 968 969 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec); 970 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec); 971 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec); 972 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 973 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec); 974 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec); 975 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec); 976 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec); 977 978 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*); 979 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*); 980 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*); 981 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*); 982 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*); 983 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*); 984 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*); 985 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*); 986 EXTERN PetscErrorCode MatGetInertia_SeqSBAIJ(Mat,int*,int*,int*); 987 988 EXTERN PetscErrorCode MatMult_SeqSBAIJ_1(Mat,Vec,Vec); 989 EXTERN PetscErrorCode MatMult_SeqSBAIJ_2(Mat,Vec,Vec); 990 EXTERN PetscErrorCode MatMult_SeqSBAIJ_3(Mat,Vec,Vec); 991 EXTERN PetscErrorCode MatMult_SeqSBAIJ_4(Mat,Vec,Vec); 992 EXTERN PetscErrorCode MatMult_SeqSBAIJ_5(Mat,Vec,Vec); 993 EXTERN PetscErrorCode MatMult_SeqSBAIJ_6(Mat,Vec,Vec); 994 EXTERN PetscErrorCode MatMult_SeqSBAIJ_7(Mat,Vec,Vec); 995 EXTERN PetscErrorCode MatMult_SeqSBAIJ_N(Mat,Vec,Vec); 996 997 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec); 998 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec); 999 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec); 1000 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec); 1001 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec); 1002 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec); 1003 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec); 1004 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec); 1005 1006 #ifdef HAVE_MatICCFactor 1007 /* modified from MatILUFactor_SeqSBAIJ, needs further work! */ 1008 #undef __FUNCT__ 1009 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 1010 PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info) 1011 { 1012 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 1013 Mat outA; 1014 PetscErrorCode ierr; 1015 PetscTruth row_identity,col_identity; 1016 1017 PetscFunctionBegin; 1018 /* 1019 if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1020 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1021 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1022 if (!row_identity || !col_identity) { 1023 SETERRQ(1,"Row and column permutations must be identity for in-place ICC"); 1024 } 1025 */ 1026 1027 outA = inA; 1028 inA->factor = FACTOR_CHOLESKY; 1029 1030 if (!a->diag) { 1031 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 1032 } 1033 /* 1034 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1035 for ILU(0) factorization with natural ordering 1036 */ 1037 switch (a->bs) { 1038 case 1: 1039 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1040 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1041 inA->ops->solves = MatSolves_SeqSBAIJ_1; 1042 PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n"); 1043 case 2: 1044 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 1045 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1046 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1047 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1048 break; 1049 case 3: 1050 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 1051 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1052 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1053 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n"); 1054 break; 1055 case 4: 1056 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 1057 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1058 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1059 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1060 break; 1061 case 5: 1062 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 1063 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1064 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1065 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1066 break; 1067 case 6: 1068 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 1069 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1070 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1071 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1072 break; 1073 case 7: 1074 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 1075 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1076 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1077 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1078 break; 1079 default: 1080 a->row = row; 1081 a->icol = col; 1082 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1083 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1084 1085 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1086 ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr); 1087 PetscLogObjectParent(inA,a->icol); 1088 1089 if (!a->solve_work) { 1090 ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1091 PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar)); 1092 } 1093 } 1094 1095 ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr); 1096 1097 PetscFunctionReturn(0); 1098 } 1099 #endif 1100 1101 #undef __FUNCT__ 1102 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ" 1103 PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat A) 1104 { 1105 static PetscTruth called = PETSC_FALSE; 1106 MPI_Comm comm = A->comm; 1107 PetscErrorCode ierr; 1108 1109 PetscFunctionBegin; 1110 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1111 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1112 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1113 PetscFunctionReturn(0); 1114 } 1115 1116 EXTERN_C_BEGIN 1117 #undef __FUNCT__ 1118 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1119 PetscErrorCode MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices) 1120 { 1121 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1122 int i,nz,n; 1123 1124 PetscFunctionBegin; 1125 nz = baij->maxnz; 1126 n = mat->n; 1127 for (i=0; i<nz; i++) { 1128 baij->j[i] = indices[i]; 1129 } 1130 baij->nz = nz; 1131 for (i=0; i<n; i++) { 1132 baij->ilen[i] = baij->imax[i]; 1133 } 1134 1135 PetscFunctionReturn(0); 1136 } 1137 EXTERN_C_END 1138 1139 #undef __FUNCT__ 1140 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1141 /*@ 1142 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1143 in the matrix. 1144 1145 Input Parameters: 1146 + mat - the SeqSBAIJ matrix 1147 - indices - the column indices 1148 1149 Level: advanced 1150 1151 Notes: 1152 This can be called if you have precomputed the nonzero structure of the 1153 matrix and want to provide it to the matrix object to improve the performance 1154 of the MatSetValues() operation. 1155 1156 You MUST have set the correct numbers of nonzeros per row in the call to 1157 MatCreateSeqSBAIJ(). 1158 1159 MUST be called before any calls to MatSetValues() 1160 1161 .seealso: MatCreateSeqSBAIJ 1162 @*/ 1163 PetscErrorCode MatSeqSBAIJSetColumnIndices(Mat mat,int *indices) 1164 { 1165 PetscErrorCode ierr,(*f)(Mat,int *); 1166 1167 PetscFunctionBegin; 1168 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1169 PetscValidPointer(indices,2); 1170 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1171 if (f) { 1172 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1173 } else { 1174 SETERRQ(1,"Wrong type of matrix to set column indices"); 1175 } 1176 PetscFunctionReturn(0); 1177 } 1178 1179 #undef __FUNCT__ 1180 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1181 PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A) 1182 { 1183 PetscErrorCode ierr; 1184 1185 PetscFunctionBegin; 1186 ierr = MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1187 PetscFunctionReturn(0); 1188 } 1189 1190 #undef __FUNCT__ 1191 #define __FUNCT__ "MatGetArray_SeqSBAIJ" 1192 PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1193 { 1194 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1195 PetscFunctionBegin; 1196 *array = a->a; 1197 PetscFunctionReturn(0); 1198 } 1199 1200 #undef __FUNCT__ 1201 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ" 1202 PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1203 { 1204 PetscFunctionBegin; 1205 PetscFunctionReturn(0); 1206 } 1207 1208 #include "petscblaslapack.h" 1209 #undef __FUNCT__ 1210 #define __FUNCT__ "MatAXPY_SeqSBAIJ" 1211 PetscErrorCode MatAXPY_SeqSBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str) 1212 { 1213 Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data; 1214 PetscErrorCode ierr; 1215 int i,bs=y->bs,bs2,j; 1216 PetscBLASInt bnz = (PetscBLASInt)x->nz,one = 1; 1217 1218 PetscFunctionBegin; 1219 if (str == SAME_NONZERO_PATTERN) { 1220 BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1221 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1222 if (y->xtoy && y->XtoY != X) { 1223 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1224 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1225 } 1226 if (!y->xtoy) { /* get xtoy */ 1227 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1228 y->XtoY = X; 1229 } 1230 bs2 = bs*bs; 1231 for (i=0; i<x->nz; i++) { 1232 j = 0; 1233 while (j < bs2){ 1234 y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]); 1235 j++; 1236 } 1237 } 1238 PetscLogInfo(0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %d/%d = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)); 1239 } else { 1240 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1241 } 1242 PetscFunctionReturn(0); 1243 } 1244 1245 #undef __FUNCT__ 1246 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ" 1247 PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg) 1248 { 1249 PetscFunctionBegin; 1250 *flg = PETSC_TRUE; 1251 PetscFunctionReturn(0); 1252 } 1253 1254 #undef __FUNCT__ 1255 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ" 1256 PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg) 1257 { 1258 PetscFunctionBegin; 1259 *flg = PETSC_TRUE; 1260 PetscFunctionReturn(0); 1261 } 1262 1263 #undef __FUNCT__ 1264 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ" 1265 PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscTruth *flg) 1266 { 1267 PetscFunctionBegin; 1268 *flg = PETSC_FALSE; 1269 PetscFunctionReturn(0); 1270 } 1271 1272 /* -------------------------------------------------------------------*/ 1273 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1274 MatGetRow_SeqSBAIJ, 1275 MatRestoreRow_SeqSBAIJ, 1276 MatMult_SeqSBAIJ_N, 1277 /* 4*/ MatMultAdd_SeqSBAIJ_N, 1278 MatMultTranspose_SeqSBAIJ, 1279 MatMultTransposeAdd_SeqSBAIJ, 1280 MatSolve_SeqSBAIJ_N, 1281 0, 1282 0, 1283 /*10*/ 0, 1284 0, 1285 MatCholeskyFactor_SeqSBAIJ, 1286 MatRelax_SeqSBAIJ, 1287 MatTranspose_SeqSBAIJ, 1288 /*15*/ MatGetInfo_SeqSBAIJ, 1289 MatEqual_SeqSBAIJ, 1290 MatGetDiagonal_SeqSBAIJ, 1291 MatDiagonalScale_SeqSBAIJ, 1292 MatNorm_SeqSBAIJ, 1293 /*20*/ 0, 1294 MatAssemblyEnd_SeqSBAIJ, 1295 0, 1296 MatSetOption_SeqSBAIJ, 1297 MatZeroEntries_SeqSBAIJ, 1298 /*25*/ MatZeroRows_SeqSBAIJ, 1299 0, 1300 0, 1301 MatCholeskyFactorSymbolic_SeqSBAIJ, 1302 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1303 /*30*/ MatSetUpPreallocation_SeqSBAIJ, 1304 0, 1305 MatICCFactorSymbolic_SeqSBAIJ, 1306 MatGetArray_SeqSBAIJ, 1307 MatRestoreArray_SeqSBAIJ, 1308 /*35*/ MatDuplicate_SeqSBAIJ, 1309 0, 1310 0, 1311 0, 1312 0, 1313 /*40*/ MatAXPY_SeqSBAIJ, 1314 MatGetSubMatrices_SeqSBAIJ, 1315 MatIncreaseOverlap_SeqSBAIJ, 1316 MatGetValues_SeqSBAIJ, 1317 0, 1318 /*45*/ MatPrintHelp_SeqSBAIJ, 1319 MatScale_SeqSBAIJ, 1320 0, 1321 0, 1322 0, 1323 /*50*/ MatGetBlockSize_SeqSBAIJ, 1324 MatGetRowIJ_SeqSBAIJ, 1325 MatRestoreRowIJ_SeqSBAIJ, 1326 0, 1327 0, 1328 /*55*/ 0, 1329 0, 1330 0, 1331 0, 1332 MatSetValuesBlocked_SeqSBAIJ, 1333 /*60*/ MatGetSubMatrix_SeqSBAIJ, 1334 0, 1335 0, 1336 MatGetPetscMaps_Petsc, 1337 0, 1338 /*65*/ 0, 1339 0, 1340 0, 1341 0, 1342 0, 1343 /*70*/ MatGetRowMax_SeqSBAIJ, 1344 0, 1345 0, 1346 0, 1347 0, 1348 /*75*/ 0, 1349 0, 1350 0, 1351 0, 1352 0, 1353 /*80*/ 0, 1354 0, 1355 0, 1356 #if !defined(PETSC_USE_COMPLEX) 1357 MatGetInertia_SeqSBAIJ, 1358 #else 1359 0, 1360 #endif 1361 /*84*/ MatLoad_SeqSBAIJ, 1362 MatIsSymmetric_SeqSBAIJ, 1363 MatIsStructurallySymmetric_SeqSBAIJ, 1364 MatIsHermitian_SeqSBAIJ 1365 }; 1366 1367 EXTERN_C_BEGIN 1368 #undef __FUNCT__ 1369 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1370 PetscErrorCode MatStoreValues_SeqSBAIJ(Mat mat) 1371 { 1372 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1373 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1374 PetscErrorCode ierr; 1375 1376 PetscFunctionBegin; 1377 if (aij->nonew != 1) { 1378 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1379 } 1380 1381 /* allocate space for values if not already there */ 1382 if (!aij->saved_values) { 1383 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1384 } 1385 1386 /* copy values over */ 1387 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1388 PetscFunctionReturn(0); 1389 } 1390 EXTERN_C_END 1391 1392 EXTERN_C_BEGIN 1393 #undef __FUNCT__ 1394 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1395 PetscErrorCode MatRetrieveValues_SeqSBAIJ(Mat mat) 1396 { 1397 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1398 PetscErrorCode ierr; 1399 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1400 1401 PetscFunctionBegin; 1402 if (aij->nonew != 1) { 1403 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1404 } 1405 if (!aij->saved_values) { 1406 SETERRQ(1,"Must call MatStoreValues(A);first"); 1407 } 1408 1409 /* copy values over */ 1410 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1411 PetscFunctionReturn(0); 1412 } 1413 EXTERN_C_END 1414 1415 EXTERN_C_BEGIN 1416 #undef __FUNCT__ 1417 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ" 1418 PetscErrorCode MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,int bs,int nz,int *nnz) 1419 { 1420 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1421 PetscErrorCode ierr; 1422 int i,len,mbs,bs2; 1423 PetscTruth flg; 1424 1425 PetscFunctionBegin; 1426 B->preallocated = PETSC_TRUE; 1427 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1428 mbs = B->m/bs; 1429 bs2 = bs*bs; 1430 1431 if (mbs*bs != B->m) { 1432 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1433 } 1434 1435 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1436 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1437 if (nnz) { 1438 for (i=0; i<mbs; i++) { 1439 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1440 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],mbs); 1441 } 1442 } 1443 1444 ierr = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1445 if (!flg) { 1446 switch (bs) { 1447 case 1: 1448 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1449 B->ops->solve = MatSolve_SeqSBAIJ_1; 1450 B->ops->solves = MatSolves_SeqSBAIJ_1; 1451 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_1; 1452 B->ops->mult = MatMult_SeqSBAIJ_1; 1453 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1454 break; 1455 case 2: 1456 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1457 B->ops->solve = MatSolve_SeqSBAIJ_2; 1458 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_2; 1459 B->ops->mult = MatMult_SeqSBAIJ_2; 1460 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1461 break; 1462 case 3: 1463 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1464 B->ops->solve = MatSolve_SeqSBAIJ_3; 1465 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_3; 1466 B->ops->mult = MatMult_SeqSBAIJ_3; 1467 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1468 break; 1469 case 4: 1470 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1471 B->ops->solve = MatSolve_SeqSBAIJ_4; 1472 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_4; 1473 B->ops->mult = MatMult_SeqSBAIJ_4; 1474 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1475 break; 1476 case 5: 1477 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1478 B->ops->solve = MatSolve_SeqSBAIJ_5; 1479 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_5; 1480 B->ops->mult = MatMult_SeqSBAIJ_5; 1481 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1482 break; 1483 case 6: 1484 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1485 B->ops->solve = MatSolve_SeqSBAIJ_6; 1486 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_6; 1487 B->ops->mult = MatMult_SeqSBAIJ_6; 1488 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1489 break; 1490 case 7: 1491 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1492 B->ops->solve = MatSolve_SeqSBAIJ_7; 1493 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_7; 1494 B->ops->mult = MatMult_SeqSBAIJ_7; 1495 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1496 break; 1497 } 1498 } 1499 1500 b->mbs = mbs; 1501 b->nbs = mbs; 1502 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 1503 if (!nnz) { 1504 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1505 else if (nz <= 0) nz = 1; 1506 for (i=0; i<mbs; i++) { 1507 b->imax[i] = nz; 1508 } 1509 nz = nz*mbs; /* total nz */ 1510 } else { 1511 nz = 0; 1512 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1513 } 1514 /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1515 1516 /* allocate the matrix space */ 1517 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 1518 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1519 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1520 b->j = (int*)(b->a + nz*bs2); 1521 ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 1522 b->i = b->j + nz; 1523 b->singlemalloc = PETSC_TRUE; 1524 1525 /* pointer to beginning of each row */ 1526 b->i[0] = 0; 1527 for (i=1; i<mbs+1; i++) { 1528 b->i[i] = b->i[i-1] + b->imax[i-1]; 1529 } 1530 1531 /* b->ilen will count nonzeros in each block row so far. */ 1532 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 1533 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1534 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1535 1536 b->bs = bs; 1537 b->bs2 = bs2; 1538 b->nz = 0; 1539 b->maxnz = nz*bs2; 1540 1541 b->inew = 0; 1542 b->jnew = 0; 1543 b->anew = 0; 1544 b->a2anew = 0; 1545 b->permute = PETSC_FALSE; 1546 PetscFunctionReturn(0); 1547 } 1548 EXTERN_C_END 1549 1550 EXTERN_C_BEGIN 1551 EXTERN PetscErrorCode MatConvert_SeqSBAIJ_SeqAIJ(Mat,const MatType,Mat*); 1552 EXTERN PetscErrorCode MatConvert_SeqSBAIJ_SeqBAIJ(Mat,const MatType,Mat*); 1553 EXTERN_C_END 1554 1555 /*MC 1556 MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices, 1557 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1558 1559 Options Database Keys: 1560 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions() 1561 1562 Level: beginner 1563 1564 .seealso: MatCreateSeqSBAIJ 1565 M*/ 1566 1567 EXTERN_C_BEGIN 1568 #undef __FUNCT__ 1569 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1570 PetscErrorCode MatCreate_SeqSBAIJ(Mat B) 1571 { 1572 Mat_SeqSBAIJ *b; 1573 PetscErrorCode ierr; 1574 int size; 1575 1576 PetscFunctionBegin; 1577 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1578 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1579 B->m = B->M = PetscMax(B->m,B->M); 1580 B->n = B->N = PetscMax(B->n,B->N); 1581 1582 ierr = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1583 B->data = (void*)b; 1584 ierr = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr); 1585 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1586 B->ops->destroy = MatDestroy_SeqSBAIJ; 1587 B->ops->view = MatView_SeqSBAIJ; 1588 B->factor = 0; 1589 B->lupivotthreshold = 1.0; 1590 B->mapping = 0; 1591 b->row = 0; 1592 b->icol = 0; 1593 b->reallocs = 0; 1594 b->saved_values = 0; 1595 1596 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1597 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1598 1599 b->sorted = PETSC_FALSE; 1600 b->roworiented = PETSC_TRUE; 1601 b->nonew = 0; 1602 b->diag = 0; 1603 b->solve_work = 0; 1604 b->mult_work = 0; 1605 B->spptr = 0; 1606 b->keepzeroedrows = PETSC_FALSE; 1607 b->xtoy = 0; 1608 b->XtoY = 0; 1609 1610 b->inew = 0; 1611 b->jnew = 0; 1612 b->anew = 0; 1613 b->a2anew = 0; 1614 b->permute = PETSC_FALSE; 1615 1616 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1617 "MatStoreValues_SeqSBAIJ", 1618 MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1619 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1620 "MatRetrieveValues_SeqSBAIJ", 1621 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1622 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1623 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1624 MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1625 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C", 1626 "MatConvert_SeqSBAIJ_SeqAIJ", 1627 MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr); 1628 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C", 1629 "MatConvert_SeqSBAIJ_SeqBAIJ", 1630 MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr); 1631 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C", 1632 "MatSeqSBAIJSetPreallocation_SeqSBAIJ", 1633 MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr); 1634 1635 B->symmetric = PETSC_TRUE; 1636 B->structurally_symmetric = PETSC_TRUE; 1637 B->symmetric_set = PETSC_TRUE; 1638 B->structurally_symmetric_set = PETSC_TRUE; 1639 PetscFunctionReturn(0); 1640 } 1641 EXTERN_C_END 1642 1643 #undef __FUNCT__ 1644 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1645 /*@C 1646 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1647 compressed row) format. For good matrix assembly performance the 1648 user should preallocate the matrix storage by setting the parameter nz 1649 (or the array nnz). By setting these parameters accurately, performance 1650 during matrix assembly can be increased by more than a factor of 50. 1651 1652 Collective on Mat 1653 1654 Input Parameters: 1655 + A - the symmetric matrix 1656 . bs - size of block 1657 . nz - number of block nonzeros per block row (same for all rows) 1658 - nnz - array containing the number of block nonzeros in the upper triangular plus 1659 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1660 1661 Options Database Keys: 1662 . -mat_no_unroll - uses code that does not unroll the loops in the 1663 block calculations (much slower) 1664 . -mat_block_size - size of the blocks to use 1665 1666 Level: intermediate 1667 1668 Notes: 1669 Specify the preallocated storage with either nz or nnz (not both). 1670 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1671 allocation. For additional details, see the users manual chapter on 1672 matrices. 1673 1674 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1675 @*/ 1676 PetscErrorCode MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[]) { 1677 PetscErrorCode ierr,(*f)(Mat,int,int,const int[]); 1678 1679 PetscFunctionBegin; 1680 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1681 if (f) { 1682 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 1683 } 1684 PetscFunctionReturn(0); 1685 } 1686 1687 #undef __FUNCT__ 1688 #define __FUNCT__ "MatCreateSeqSBAIJ" 1689 /*@C 1690 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1691 compressed row) format. For good matrix assembly performance the 1692 user should preallocate the matrix storage by setting the parameter nz 1693 (or the array nnz). By setting these parameters accurately, performance 1694 during matrix assembly can be increased by more than a factor of 50. 1695 1696 Collective on MPI_Comm 1697 1698 Input Parameters: 1699 + comm - MPI communicator, set to PETSC_COMM_SELF 1700 . bs - size of block 1701 . m - number of rows, or number of columns 1702 . nz - number of block nonzeros per block row (same for all rows) 1703 - nnz - array containing the number of block nonzeros in the upper triangular plus 1704 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1705 1706 Output Parameter: 1707 . A - the symmetric matrix 1708 1709 Options Database Keys: 1710 . -mat_no_unroll - uses code that does not unroll the loops in the 1711 block calculations (much slower) 1712 . -mat_block_size - size of the blocks to use 1713 1714 Level: intermediate 1715 1716 Notes: 1717 1718 Specify the preallocated storage with either nz or nnz (not both). 1719 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1720 allocation. For additional details, see the users manual chapter on 1721 matrices. 1722 1723 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1724 @*/ 1725 PetscErrorCode MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A) 1726 { 1727 PetscErrorCode ierr; 1728 1729 PetscFunctionBegin; 1730 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1731 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1732 ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1733 PetscFunctionReturn(0); 1734 } 1735 1736 #undef __FUNCT__ 1737 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1738 PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1739 { 1740 Mat C; 1741 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1742 PetscErrorCode ierr; 1743 int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 1744 1745 PetscFunctionBegin; 1746 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1747 1748 *B = 0; 1749 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1750 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 1751 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1752 c = (Mat_SeqSBAIJ*)C->data; 1753 1754 C->preallocated = PETSC_TRUE; 1755 C->factor = A->factor; 1756 c->row = 0; 1757 c->icol = 0; 1758 c->saved_values = 0; 1759 c->keepzeroedrows = a->keepzeroedrows; 1760 C->assembled = PETSC_TRUE; 1761 1762 C->M = A->M; 1763 C->N = A->N; 1764 c->bs = a->bs; 1765 c->bs2 = a->bs2; 1766 c->mbs = a->mbs; 1767 c->nbs = a->nbs; 1768 1769 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1770 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1771 for (i=0; i<mbs; i++) { 1772 c->imax[i] = a->imax[i]; 1773 c->ilen[i] = a->ilen[i]; 1774 } 1775 1776 /* allocate the matrix space */ 1777 c->singlemalloc = PETSC_TRUE; 1778 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1779 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1780 c->j = (int*)(c->a + nz*bs2); 1781 c->i = c->j + nz; 1782 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1783 if (mbs > 0) { 1784 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1785 if (cpvalues == MAT_COPY_VALUES) { 1786 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1787 } else { 1788 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1789 } 1790 } 1791 1792 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1793 c->sorted = a->sorted; 1794 c->roworiented = a->roworiented; 1795 c->nonew = a->nonew; 1796 1797 if (a->diag) { 1798 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1799 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1800 for (i=0; i<mbs; i++) { 1801 c->diag[i] = a->diag[i]; 1802 } 1803 } else c->diag = 0; 1804 c->nz = a->nz; 1805 c->maxnz = a->maxnz; 1806 c->solve_work = 0; 1807 c->mult_work = 0; 1808 *B = C; 1809 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1810 PetscFunctionReturn(0); 1811 } 1812 1813 #undef __FUNCT__ 1814 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1815 PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer,const MatType type,Mat *A) 1816 { 1817 Mat_SeqSBAIJ *a; 1818 Mat B; 1819 PetscErrorCode ierr; 1820 int i,nz,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1821 int *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 1822 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1823 int *masked,nmask,tmp,bs2,ishift; 1824 PetscScalar *aa; 1825 MPI_Comm comm = ((PetscObject)viewer)->comm; 1826 1827 PetscFunctionBegin; 1828 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1829 bs2 = bs*bs; 1830 1831 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1832 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1833 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1834 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1835 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1836 M = header[1]; N = header[2]; nz = header[3]; 1837 1838 if (header[3] < 0) { 1839 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1840 } 1841 1842 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1843 1844 /* 1845 This code adds extra rows to make sure the number of rows is 1846 divisible by the blocksize 1847 */ 1848 mbs = M/bs; 1849 extra_rows = bs - M + bs*(mbs); 1850 if (extra_rows == bs) extra_rows = 0; 1851 else mbs++; 1852 if (extra_rows) { 1853 PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n"); 1854 } 1855 1856 /* read in row lengths */ 1857 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1858 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1859 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1860 1861 /* read in column indices */ 1862 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1863 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1864 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1865 1866 /* loop over row lengths determining block row lengths */ 1867 ierr = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr); 1868 ierr = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1869 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1870 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1871 masked = mask + mbs; 1872 rowcount = 0; nzcount = 0; 1873 for (i=0; i<mbs; i++) { 1874 nmask = 0; 1875 for (j=0; j<bs; j++) { 1876 kmax = rowlengths[rowcount]; 1877 for (k=0; k<kmax; k++) { 1878 tmp = jj[nzcount++]/bs; /* block col. index */ 1879 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1880 } 1881 rowcount++; 1882 } 1883 s_browlengths[i] += nmask; 1884 1885 /* zero out the mask elements we set */ 1886 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1887 } 1888 1889 /* create our matrix */ 1890 ierr = MatCreate(comm,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows,&B);CHKERRQ(ierr); 1891 ierr = MatSetType(B,type);CHKERRQ(ierr); 1892 ierr = MatSeqSBAIJSetPreallocation(B,bs,0,s_browlengths);CHKERRQ(ierr); 1893 a = (Mat_SeqSBAIJ*)B->data; 1894 1895 /* set matrix "i" values */ 1896 a->i[0] = 0; 1897 for (i=1; i<= mbs; i++) { 1898 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1899 a->ilen[i-1] = s_browlengths[i-1]; 1900 } 1901 a->nz = a->i[mbs]; 1902 1903 /* read in nonzero values */ 1904 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1905 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1906 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1907 1908 /* set "a" and "j" values into matrix */ 1909 nzcount = 0; jcount = 0; 1910 for (i=0; i<mbs; i++) { 1911 nzcountb = nzcount; 1912 nmask = 0; 1913 for (j=0; j<bs; j++) { 1914 kmax = rowlengths[i*bs+j]; 1915 for (k=0; k<kmax; k++) { 1916 tmp = jj[nzcount++]/bs; /* block col. index */ 1917 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1918 } 1919 } 1920 /* sort the masked values */ 1921 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1922 1923 /* set "j" values into matrix */ 1924 maskcount = 1; 1925 for (j=0; j<nmask; j++) { 1926 a->j[jcount++] = masked[j]; 1927 mask[masked[j]] = maskcount++; 1928 } 1929 1930 /* set "a" values into matrix */ 1931 ishift = bs2*a->i[i]; 1932 for (j=0; j<bs; j++) { 1933 kmax = rowlengths[i*bs+j]; 1934 for (k=0; k<kmax; k++) { 1935 tmp = jj[nzcountb]/bs ; /* block col. index */ 1936 if (tmp >= i){ 1937 block = mask[tmp] - 1; 1938 point = jj[nzcountb] - bs*tmp; 1939 idx = ishift + bs2*block + j + bs*point; 1940 a->a[idx] = aa[nzcountb]; 1941 } 1942 nzcountb++; 1943 } 1944 } 1945 /* zero out the mask elements we set */ 1946 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1947 } 1948 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1949 1950 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1951 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1952 ierr = PetscFree(aa);CHKERRQ(ierr); 1953 ierr = PetscFree(jj);CHKERRQ(ierr); 1954 ierr = PetscFree(mask);CHKERRQ(ierr); 1955 1956 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1957 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1958 ierr = MatView_Private(B);CHKERRQ(ierr); 1959 *A = B; 1960 PetscFunctionReturn(0); 1961 } 1962 1963 #undef __FUNCT__ 1964 #define __FUNCT__ "MatRelax_SeqSBAIJ" 1965 PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1966 { 1967 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1968 MatScalar *aa=a->a,*v,*v1; 1969 PetscScalar *x,*b,*t,sum,d; 1970 PetscErrorCode ierr; 1971 int m=a->mbs,bs=a->bs,*ai=a->i,*aj=a->j; 1972 int nz,nz1,*vj,*vj1,i; 1973 1974 PetscFunctionBegin; 1975 its = its*lits; 1976 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1977 1978 if (bs > 1) 1979 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 1980 1981 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1982 if (xx != bb) { 1983 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1984 } else { 1985 b = x; 1986 } 1987 1988 ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr); 1989 1990 if (flag & SOR_ZERO_INITIAL_GUESS) { 1991 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1992 for (i=0; i<m; i++) 1993 t[i] = b[i]; 1994 1995 for (i=0; i<m; i++){ 1996 d = *(aa + ai[i]); /* diag[i] */ 1997 v = aa + ai[i] + 1; 1998 vj = aj + ai[i] + 1; 1999 nz = ai[i+1] - ai[i] - 1; 2000 x[i] = omega*t[i]/d; 2001 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 2002 PetscLogFlops(2*nz-1); 2003 } 2004 } 2005 2006 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2007 for (i=0; i<m; i++) 2008 t[i] = b[i]; 2009 2010 for (i=0; i<m-1; i++){ /* update rhs */ 2011 v = aa + ai[i] + 1; 2012 vj = aj + ai[i] + 1; 2013 nz = ai[i+1] - ai[i] - 1; 2014 while (nz--) t[*vj++] -= x[i]*(*v++); 2015 PetscLogFlops(2*nz-1); 2016 } 2017 for (i=m-1; i>=0; i--){ 2018 d = *(aa + ai[i]); 2019 v = aa + ai[i] + 1; 2020 vj = aj + ai[i] + 1; 2021 nz = ai[i+1] - ai[i] - 1; 2022 sum = t[i]; 2023 while (nz--) sum -= x[*vj++]*(*v++); 2024 PetscLogFlops(2*nz-1); 2025 x[i] = (1-omega)*x[i] + omega*sum/d; 2026 } 2027 } 2028 its--; 2029 } 2030 2031 while (its--) { 2032 /* 2033 forward sweep: 2034 for i=0,...,m-1: 2035 sum[i] = (b[i] - U(i,:)x )/d[i]; 2036 x[i] = (1-omega)x[i] + omega*sum[i]; 2037 b = b - x[i]*U^T(i,:); 2038 2039 */ 2040 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2041 for (i=0; i<m; i++) 2042 t[i] = b[i]; 2043 2044 for (i=0; i<m; i++){ 2045 d = *(aa + ai[i]); /* diag[i] */ 2046 v = aa + ai[i] + 1; v1=v; 2047 vj = aj + ai[i] + 1; vj1=vj; 2048 nz = ai[i+1] - ai[i] - 1; nz1=nz; 2049 sum = t[i]; 2050 while (nz1--) sum -= (*v1++)*x[*vj1++]; 2051 x[i] = (1-omega)*x[i] + omega*sum/d; 2052 while (nz--) t[*vj++] -= x[i]*(*v++); 2053 PetscLogFlops(4*nz-2); 2054 } 2055 } 2056 2057 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2058 /* 2059 backward sweep: 2060 b = b - x[i]*U^T(i,:), i=0,...,n-2 2061 for i=m-1,...,0: 2062 sum[i] = (b[i] - U(i,:)x )/d[i]; 2063 x[i] = (1-omega)x[i] + omega*sum[i]; 2064 */ 2065 for (i=0; i<m; i++) 2066 t[i] = b[i]; 2067 2068 for (i=0; i<m-1; i++){ /* update rhs */ 2069 v = aa + ai[i] + 1; 2070 vj = aj + ai[i] + 1; 2071 nz = ai[i+1] - ai[i] - 1; 2072 while (nz--) t[*vj++] -= x[i]*(*v++); 2073 PetscLogFlops(2*nz-1); 2074 } 2075 for (i=m-1; i>=0; i--){ 2076 d = *(aa + ai[i]); 2077 v = aa + ai[i] + 1; 2078 vj = aj + ai[i] + 1; 2079 nz = ai[i+1] - ai[i] - 1; 2080 sum = t[i]; 2081 while (nz--) sum -= x[*vj++]*(*v++); 2082 PetscLogFlops(2*nz-1); 2083 x[i] = (1-omega)*x[i] + omega*sum/d; 2084 } 2085 } 2086 } 2087 2088 ierr = PetscFree(t);CHKERRQ(ierr); 2089 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2090 if (bb != xx) { 2091 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2092 } 2093 2094 PetscFunctionReturn(0); 2095 } 2096 2097 2098 2099 2100 2101 2102