1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the SBAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 8 #include "src/inline/spops.h" 9 #include "src/mat/impls/sbaij/seq/sbaij.h" 10 11 #define CHUNKSIZE 10 12 13 /* 14 Checks for missing diagonals 15 */ 16 #undef __FUNCT__ 17 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 18 PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A) 19 { 20 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 21 PetscErrorCode ierr; 22 PetscInt *diag,*jj = a->j,i; 23 24 PetscFunctionBegin; 25 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 26 diag = a->diag; 27 for (i=0; i<a->mbs; i++) { 28 if (jj[diag[i]] != i) SETERRQ1(PETSC_ERR_ARG_CORRUPT,"Matrix is missing diagonal number %D",i); 29 } 30 PetscFunctionReturn(0); 31 } 32 33 #undef __FUNCT__ 34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 35 PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A) 36 { 37 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 38 PetscErrorCode ierr; 39 PetscInt i,mbs = a->mbs; 40 41 PetscFunctionBegin; 42 if (a->diag) PetscFunctionReturn(0); 43 44 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 45 ierr = PetscLogObjectMemory(A,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 46 for (i=0; i<mbs; i++) a->diag[i] = a->i[i]; 47 PetscFunctionReturn(0); 48 } 49 50 #undef __FUNCT__ 51 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 52 static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 53 { 54 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 55 PetscInt n = a->mbs,i; 56 57 PetscFunctionBegin; 58 *nn = n; 59 if (!ia) PetscFunctionReturn(0); 60 61 if (oshift == 1) { 62 /* temporarily add 1 to i and j indices */ 63 PetscInt nz = a->i[n]; 64 for (i=0; i<nz; i++) a->j[i]++; 65 for (i=0; i<n+1; i++) a->i[i]++; 66 *ia = a->i; *ja = a->j; 67 } else { 68 *ia = a->i; *ja = a->j; 69 } 70 PetscFunctionReturn(0); 71 } 72 73 #undef __FUNCT__ 74 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 75 static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 76 { 77 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 78 PetscInt i,n = a->mbs; 79 80 PetscFunctionBegin; 81 if (!ia) PetscFunctionReturn(0); 82 83 if (oshift == 1) { 84 PetscInt nz = a->i[n]-1; 85 for (i=0; i<nz; i++) a->j[i]--; 86 for (i=0; i<n+1; i++) a->i[i]--; 87 } 88 PetscFunctionReturn(0); 89 } 90 91 #undef __FUNCT__ 92 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 93 PetscErrorCode MatDestroy_SeqSBAIJ(Mat A) 94 { 95 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 96 PetscErrorCode ierr; 97 98 PetscFunctionBegin; 99 PetscLogObjectState((PetscObject)A,"Rows=%D, NZ=%D",A->m,a->nz); 100 ierr = MatSeqXAIJFreeAIJ(a->singlemalloc,&a->a,&a->j,&a->i);CHKERRQ(ierr); 101 if (a->row) { 102 ierr = ISDestroy(a->row);CHKERRQ(ierr); 103 } 104 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 105 if (a->imax) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 106 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 107 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 108 if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);} 109 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 110 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 111 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 112 113 if (a->inew){ 114 ierr = PetscFree(a->inew);CHKERRQ(ierr); 115 a->inew = 0; 116 } 117 ierr = PetscFree(a);CHKERRQ(ierr); 118 119 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 120 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 121 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 122 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 123 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqbaij_C","",PETSC_NULL);CHKERRQ(ierr); 124 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 125 PetscFunctionReturn(0); 126 } 127 128 #undef __FUNCT__ 129 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 130 PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op) 131 { 132 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 133 PetscErrorCode ierr; 134 135 PetscFunctionBegin; 136 switch (op) { 137 case MAT_ROW_ORIENTED: 138 a->roworiented = PETSC_TRUE; 139 break; 140 case MAT_COLUMN_ORIENTED: 141 a->roworiented = PETSC_FALSE; 142 break; 143 case MAT_COLUMNS_SORTED: 144 a->sorted = PETSC_TRUE; 145 break; 146 case MAT_COLUMNS_UNSORTED: 147 a->sorted = PETSC_FALSE; 148 break; 149 case MAT_KEEP_ZEROED_ROWS: 150 a->keepzeroedrows = PETSC_TRUE; 151 break; 152 case MAT_NO_NEW_NONZERO_LOCATIONS: 153 a->nonew = 1; 154 break; 155 case MAT_NEW_NONZERO_LOCATION_ERR: 156 a->nonew = -1; 157 break; 158 case MAT_NEW_NONZERO_ALLOCATION_ERR: 159 a->nonew = -2; 160 break; 161 case MAT_YES_NEW_NONZERO_LOCATIONS: 162 a->nonew = 0; 163 break; 164 case MAT_ROWS_SORTED: 165 case MAT_ROWS_UNSORTED: 166 case MAT_YES_NEW_DIAGONALS: 167 case MAT_IGNORE_OFF_PROC_ENTRIES: 168 case MAT_USE_HASH_TABLE: 169 ierr = PetscLogInfo((A,"MatSetOption_SeqSBAIJ:Option ignored\n"));CHKERRQ(ierr); 170 break; 171 case MAT_NO_NEW_DIAGONALS: 172 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 173 case MAT_NOT_SYMMETRIC: 174 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 175 case MAT_HERMITIAN: 176 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 177 case MAT_SYMMETRIC: 178 case MAT_STRUCTURALLY_SYMMETRIC: 179 case MAT_NOT_HERMITIAN: 180 case MAT_SYMMETRY_ETERNAL: 181 case MAT_NOT_SYMMETRY_ETERNAL: 182 case MAT_IGNORE_LOWER_TRIANGULAR: 183 a->ignore_ltriangular = PETSC_TRUE; 184 break; 185 case MAT_ERROR_LOWER_TRIANGULAR: 186 a->ignore_ltriangular = PETSC_FALSE; 187 break; 188 default: 189 SETERRQ(PETSC_ERR_SUP,"unknown option"); 190 } 191 PetscFunctionReturn(0); 192 } 193 194 #undef __FUNCT__ 195 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 196 PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **v) 197 { 198 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 199 PetscErrorCode ierr; 200 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2; 201 MatScalar *aa,*aa_i; 202 PetscScalar *v_i; 203 204 PetscFunctionBegin; 205 bs = A->bs; 206 ai = a->i; 207 aj = a->j; 208 aa = a->a; 209 bs2 = a->bs2; 210 211 if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %D out of range", row); 212 213 bn = row/bs; /* Block number */ 214 bp = row % bs; /* Block position */ 215 M = ai[bn+1] - ai[bn]; 216 *ncols = bs*M; 217 218 if (v) { 219 *v = 0; 220 if (*ncols) { 221 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 222 for (i=0; i<M; i++) { /* for each block in the block row */ 223 v_i = *v + i*bs; 224 aa_i = aa + bs2*(ai[bn] + i); 225 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 226 } 227 } 228 } 229 230 if (cols) { 231 *cols = 0; 232 if (*ncols) { 233 ierr = PetscMalloc((*ncols+row)*sizeof(PetscInt),cols);CHKERRQ(ierr); 234 for (i=0; i<M; i++) { /* for each block in the block row */ 235 cols_i = *cols + i*bs; 236 itmp = bs*aj[ai[bn] + i]; 237 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 238 } 239 } 240 } 241 242 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 243 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 244 #ifdef column_search 245 v_i = *v + M*bs; 246 cols_i = *cols + M*bs; 247 for (i=0; i<bn; i++){ /* for each block row */ 248 M = ai[i+1] - ai[i]; 249 for (j=0; j<M; j++){ 250 itmp = aj[ai[i] + j]; /* block column value */ 251 if (itmp == bn){ 252 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 253 for (k=0; k<bs; k++) { 254 *cols_i++ = i*bs+k; 255 *v_i++ = aa_i[k]; 256 } 257 *ncols += bs; 258 break; 259 } 260 } 261 } 262 #endif 263 PetscFunctionReturn(0); 264 } 265 266 #undef __FUNCT__ 267 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 268 PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 269 { 270 PetscErrorCode ierr; 271 272 PetscFunctionBegin; 273 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 274 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 275 PetscFunctionReturn(0); 276 } 277 278 #undef __FUNCT__ 279 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 280 PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,Mat *B) 281 { 282 PetscErrorCode ierr; 283 PetscFunctionBegin; 284 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 285 PetscFunctionReturn(0); 286 } 287 288 #undef __FUNCT__ 289 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 290 static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 291 { 292 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 293 PetscErrorCode ierr; 294 PetscInt i,j,bs = A->bs,k,l,bs2=a->bs2; 295 const char *name; 296 PetscViewerFormat format; 297 298 PetscFunctionBegin; 299 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 300 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 301 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 302 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 303 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 304 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 305 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 306 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 307 for (i=0; i<a->mbs; i++) { 308 for (j=0; j<bs; j++) { 309 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 310 for (k=a->i[i]; k<a->i[i+1]; k++) { 311 for (l=0; l<bs; l++) { 312 #if defined(PETSC_USE_COMPLEX) 313 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 314 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 315 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 316 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 317 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 318 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 319 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 320 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 321 } 322 #else 323 if (a->a[bs2*k + l*bs + j] != 0.0) { 324 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 325 } 326 #endif 327 } 328 } 329 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 330 } 331 } 332 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 333 } else { 334 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 335 for (i=0; i<a->mbs; i++) { 336 for (j=0; j<bs; j++) { 337 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 338 for (k=a->i[i]; k<a->i[i+1]; k++) { 339 for (l=0; l<bs; l++) { 340 #if defined(PETSC_USE_COMPLEX) 341 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 342 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 343 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 344 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 345 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 346 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 347 } else { 348 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 349 } 350 #else 351 ierr = PetscViewerASCIIPrintf(viewer," %D %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 352 #endif 353 } 354 } 355 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 356 } 357 } 358 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 359 } 360 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 361 PetscFunctionReturn(0); 362 } 363 364 #undef __FUNCT__ 365 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 366 static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 367 { 368 Mat A = (Mat) Aa; 369 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 370 PetscErrorCode ierr; 371 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->bs,bs2=a->bs2; 372 PetscMPIInt rank; 373 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 374 MatScalar *aa; 375 MPI_Comm comm; 376 PetscViewer viewer; 377 378 PetscFunctionBegin; 379 /* 380 This is nasty. If this is called from an originally parallel matrix 381 then all processes call this,but only the first has the matrix so the 382 rest should return immediately. 383 */ 384 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 385 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 386 if (rank) PetscFunctionReturn(0); 387 388 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 389 390 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 391 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 392 393 /* loop over matrix elements drawing boxes */ 394 color = PETSC_DRAW_BLUE; 395 for (i=0,row=0; i<mbs; i++,row+=bs) { 396 for (j=a->i[i]; j<a->i[i+1]; j++) { 397 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 398 x_l = a->j[j]*bs; x_r = x_l + 1.0; 399 aa = a->a + j*bs2; 400 for (k=0; k<bs; k++) { 401 for (l=0; l<bs; l++) { 402 if (PetscRealPart(*aa++) >= 0.) continue; 403 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 404 } 405 } 406 } 407 } 408 color = PETSC_DRAW_CYAN; 409 for (i=0,row=0; i<mbs; i++,row+=bs) { 410 for (j=a->i[i]; j<a->i[i+1]; j++) { 411 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 412 x_l = a->j[j]*bs; x_r = x_l + 1.0; 413 aa = a->a + j*bs2; 414 for (k=0; k<bs; k++) { 415 for (l=0; l<bs; l++) { 416 if (PetscRealPart(*aa++) != 0.) continue; 417 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 418 } 419 } 420 } 421 } 422 423 color = PETSC_DRAW_RED; 424 for (i=0,row=0; i<mbs; i++,row+=bs) { 425 for (j=a->i[i]; j<a->i[i+1]; j++) { 426 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 427 x_l = a->j[j]*bs; x_r = x_l + 1.0; 428 aa = a->a + j*bs2; 429 for (k=0; k<bs; k++) { 430 for (l=0; l<bs; l++) { 431 if (PetscRealPart(*aa++) <= 0.) continue; 432 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 433 } 434 } 435 } 436 } 437 PetscFunctionReturn(0); 438 } 439 440 #undef __FUNCT__ 441 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 442 static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 443 { 444 PetscErrorCode ierr; 445 PetscReal xl,yl,xr,yr,w,h; 446 PetscDraw draw; 447 PetscTruth isnull; 448 449 PetscFunctionBegin; 450 451 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 452 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 453 454 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 455 xr = A->m; yr = A->m; h = yr/10.0; w = xr/10.0; 456 xr += w; yr += h; xl = -w; yl = -h; 457 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 458 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 459 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 460 PetscFunctionReturn(0); 461 } 462 463 #undef __FUNCT__ 464 #define __FUNCT__ "MatView_SeqSBAIJ" 465 PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 466 { 467 PetscErrorCode ierr; 468 PetscTruth iascii,isdraw; 469 470 PetscFunctionBegin; 471 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 472 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 473 if (iascii){ 474 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 475 } else if (isdraw) { 476 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 477 } else { 478 Mat B; 479 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 480 ierr = MatView(B,viewer);CHKERRQ(ierr); 481 ierr = MatDestroy(B);CHKERRQ(ierr); 482 } 483 PetscFunctionReturn(0); 484 } 485 486 487 #undef __FUNCT__ 488 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 489 PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 490 { 491 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 492 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 493 PetscInt *ai = a->i,*ailen = a->ilen; 494 PetscInt brow,bcol,ridx,cidx,bs=A->bs,bs2=a->bs2; 495 MatScalar *ap,*aa = a->a,zero = 0.0; 496 497 PetscFunctionBegin; 498 for (k=0; k<m; k++) { /* loop over rows */ 499 row = im[k]; brow = row/bs; 500 if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",row); 501 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1); 502 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 503 nrow = ailen[brow]; 504 for (l=0; l<n; l++) { /* loop over columns */ 505 if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",in[l]); 506 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->n-1); 507 col = in[l] ; 508 bcol = col/bs; 509 cidx = col%bs; 510 ridx = row%bs; 511 high = nrow; 512 low = 0; /* assume unsorted */ 513 while (high-low > 5) { 514 t = (low+high)/2; 515 if (rp[t] > bcol) high = t; 516 else low = t; 517 } 518 for (i=low; i<high; i++) { 519 if (rp[i] > bcol) break; 520 if (rp[i] == bcol) { 521 *v++ = ap[bs2*i+bs*cidx+ridx]; 522 goto finished; 523 } 524 } 525 *v++ = zero; 526 finished:; 527 } 528 } 529 PetscFunctionReturn(0); 530 } 531 532 533 #undef __FUNCT__ 534 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 535 PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 536 { 537 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 538 PetscErrorCode ierr; 539 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 540 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 541 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->bs,stepval; 542 PetscTruth roworiented=a->roworiented; 543 const MatScalar *value = v; 544 MatScalar *ap,*aa = a->a,*bap; 545 546 PetscFunctionBegin; 547 if (roworiented) { 548 stepval = (n-1)*bs; 549 } else { 550 stepval = (m-1)*bs; 551 } 552 for (k=0; k<m; k++) { /* loop over added rows */ 553 row = im[k]; 554 if (row < 0) continue; 555 #if defined(PETSC_USE_DEBUG) 556 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 557 #endif 558 rp = aj + ai[row]; 559 ap = aa + bs2*ai[row]; 560 rmax = imax[row]; 561 nrow = ailen[row]; 562 low = 0; 563 high = nrow; 564 for (l=0; l<n; l++) { /* loop over added columns */ 565 if (in[l] < 0) continue; 566 col = in[l]; 567 #if defined(PETSC_USE_DEBUG) 568 if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",col,a->nbs-1); 569 #endif 570 if (col < row) continue; /* ignore lower triangular block */ 571 if (roworiented) { 572 value = v + k*(stepval+bs)*bs + l*bs; 573 } else { 574 value = v + l*(stepval+bs)*bs + k*bs; 575 } 576 if (col < lastcol) low = 0; else high = nrow; 577 lastcol = col; 578 while (high-low > 7) { 579 t = (low+high)/2; 580 if (rp[t] > col) high = t; 581 else low = t; 582 } 583 for (i=low; i<high; i++) { 584 if (rp[i] > col) break; 585 if (rp[i] == col) { 586 bap = ap + bs2*i; 587 if (roworiented) { 588 if (is == ADD_VALUES) { 589 for (ii=0; ii<bs; ii++,value+=stepval) { 590 for (jj=ii; jj<bs2; jj+=bs) { 591 bap[jj] += *value++; 592 } 593 } 594 } else { 595 for (ii=0; ii<bs; ii++,value+=stepval) { 596 for (jj=ii; jj<bs2; jj+=bs) { 597 bap[jj] = *value++; 598 } 599 } 600 } 601 } else { 602 if (is == ADD_VALUES) { 603 for (ii=0; ii<bs; ii++,value+=stepval) { 604 for (jj=0; jj<bs; jj++) { 605 *bap++ += *value++; 606 } 607 } 608 } else { 609 for (ii=0; ii<bs; ii++,value+=stepval) { 610 for (jj=0; jj<bs; jj++) { 611 *bap++ = *value++; 612 } 613 } 614 } 615 } 616 goto noinsert2; 617 } 618 } 619 if (nonew == 1) goto noinsert2; 620 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 621 MatSeqXAIJReallocateAIJ(a,bs2,nrow,row,col,rmax,aa,ai,aj,a->mbs,rp,ap,imax,nonew); 622 N = nrow++ - 1; 623 /* shift up all the later entries in this row */ 624 for (ii=N; ii>=i; ii--) { 625 rp[ii+1] = rp[ii]; 626 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 627 } 628 if (N >= i) { 629 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 630 } 631 rp[i] = col; 632 bap = ap + bs2*i; 633 if (roworiented) { 634 for (ii=0; ii<bs; ii++,value+=stepval) { 635 for (jj=ii; jj<bs2; jj+=bs) { 636 bap[jj] = *value++; 637 } 638 } 639 } else { 640 for (ii=0; ii<bs; ii++,value+=stepval) { 641 for (jj=0; jj<bs; jj++) { 642 *bap++ = *value++; 643 } 644 } 645 } 646 noinsert2:; 647 low = i; 648 } 649 ailen[row] = nrow; 650 } 651 PetscFunctionReturn(0); 652 } 653 654 #undef __FUNCT__ 655 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 656 PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 657 { 658 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 659 PetscErrorCode ierr; 660 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 661 PetscInt m = A->m,*ip,N,*ailen = a->ilen; 662 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 663 MatScalar *aa = a->a,*ap; 664 665 PetscFunctionBegin; 666 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 667 668 if (m) rmax = ailen[0]; 669 for (i=1; i<mbs; i++) { 670 /* move each row back by the amount of empty slots (fshift) before it*/ 671 fshift += imax[i-1] - ailen[i-1]; 672 rmax = PetscMax(rmax,ailen[i]); 673 if (fshift) { 674 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 675 N = ailen[i]; 676 for (j=0; j<N; j++) { 677 ip[j-fshift] = ip[j]; 678 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 679 } 680 } 681 ai[i] = ai[i-1] + ailen[i-1]; 682 } 683 if (mbs) { 684 fshift += imax[mbs-1] - ailen[mbs-1]; 685 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 686 } 687 /* reset ilen and imax for each row */ 688 for (i=0; i<mbs; i++) { 689 ailen[i] = imax[i] = ai[i+1] - ai[i]; 690 } 691 a->nz = ai[mbs]; 692 693 /* diagonals may have moved, reset it */ 694 if (a->diag) { 695 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 696 } 697 ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n", 698 m,A->m,A->bs,fshift*bs2,a->nz*bs2));CHKERRQ(ierr); 699 ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %D\n",a->reallocs));CHKERRQ(ierr); 700 ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %D\n",rmax));CHKERRQ(ierr); 701 a->reallocs = 0; 702 A->info.nz_unneeded = (PetscReal)fshift*bs2; 703 PetscFunctionReturn(0); 704 } 705 706 /* 707 This function returns an array of flags which indicate the locations of contiguous 708 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 709 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 710 Assume: sizes should be long enough to hold all the values. 711 */ 712 #undef __FUNCT__ 713 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 714 PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 715 { 716 PetscInt i,j,k,row; 717 PetscTruth flg; 718 719 PetscFunctionBegin; 720 for (i=0,j=0; i<n; j++) { 721 row = idx[i]; 722 if (row%bs!=0) { /* Not the begining of a block */ 723 sizes[j] = 1; 724 i++; 725 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 726 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 727 i++; 728 } else { /* Begining of the block, so check if the complete block exists */ 729 flg = PETSC_TRUE; 730 for (k=1; k<bs; k++) { 731 if (row+k != idx[i+k]) { /* break in the block */ 732 flg = PETSC_FALSE; 733 break; 734 } 735 } 736 if (flg) { /* No break in the bs */ 737 sizes[j] = bs; 738 i+= bs; 739 } else { 740 sizes[j] = 1; 741 i++; 742 } 743 } 744 } 745 *bs_max = j; 746 PetscFunctionReturn(0); 747 } 748 749 750 /* Only add/insert a(i,j) with i<=j (blocks). 751 Any a(i,j) with i>j input by user is ingored. 752 */ 753 754 #undef __FUNCT__ 755 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 756 PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 757 { 758 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 759 PetscErrorCode ierr; 760 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 761 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 762 PetscInt *aj=a->j,nonew=a->nonew,bs=A->bs,brow,bcol; 763 PetscInt ridx,cidx,bs2=a->bs2; 764 MatScalar *ap,value,*aa=a->a,*bap; 765 766 PetscFunctionBegin; 767 for (k=0; k<m; k++) { /* loop over added rows */ 768 row = im[k]; /* row number */ 769 brow = row/bs; /* block row number */ 770 if (row < 0) continue; 771 #if defined(PETSC_USE_DEBUG) 772 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1); 773 #endif 774 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 775 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 776 rmax = imax[brow]; /* maximum space allocated for this row */ 777 nrow = ailen[brow]; /* actual length of this row */ 778 low = 0; 779 780 for (l=0; l<n; l++) { /* loop over added columns */ 781 if (in[l] < 0) continue; 782 #if defined(PETSC_USE_DEBUG) 783 if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->m-1); 784 #endif 785 col = in[l]; 786 bcol = col/bs; /* block col number */ 787 788 if (brow > bcol) { 789 if (a->ignore_ltriangular){ 790 continue; /* ignore lower triangular values */ 791 } else { 792 SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR)"); 793 } 794 } 795 796 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 797 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 798 /* element value a(k,l) */ 799 if (roworiented) { 800 value = v[l + k*n]; 801 } else { 802 value = v[k + l*m]; 803 } 804 805 /* move pointer bap to a(k,l) quickly and add/insert value */ 806 if (col < lastcol) low = 0; high = nrow; 807 lastcol = col; 808 while (high-low > 7) { 809 t = (low+high)/2; 810 if (rp[t] > bcol) high = t; 811 else low = t; 812 } 813 for (i=low; i<high; i++) { 814 if (rp[i] > bcol) break; 815 if (rp[i] == bcol) { 816 bap = ap + bs2*i + bs*cidx + ridx; 817 if (is == ADD_VALUES) *bap += value; 818 else *bap = value; 819 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 820 if (brow == bcol && ridx < cidx){ 821 bap = ap + bs2*i + bs*ridx + cidx; 822 if (is == ADD_VALUES) *bap += value; 823 else *bap = value; 824 } 825 goto noinsert1; 826 } 827 } 828 829 if (nonew == 1) goto noinsert1; 830 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 831 MatSeqXAIJReallocateAIJ(a,bs2,nrow,brow,bcol,rmax,aa,ai,aj,a->mbs,rp,ap,imax,nonew); 832 833 N = nrow++ - 1; 834 /* shift up all the later entries in this row */ 835 for (ii=N; ii>=i; ii--) { 836 rp[ii+1] = rp[ii]; 837 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 838 } 839 if (N>=i) { 840 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 841 } 842 rp[i] = bcol; 843 ap[bs2*i + bs*cidx + ridx] = value; 844 noinsert1:; 845 low = i; 846 } 847 } /* end of loop over added columns */ 848 ailen[brow] = nrow; 849 } /* end of loop over added rows */ 850 PetscFunctionReturn(0); 851 } 852 853 EXTERN PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*); 854 EXTERN PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*); 855 EXTERN PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat,PetscInt,IS[],PetscInt); 856 EXTERN PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,PetscInt,MatReuse,Mat*); 857 EXTERN PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat*[]); 858 EXTERN PetscErrorCode MatScale_SeqSBAIJ(const PetscScalar*,Mat); 859 EXTERN PetscErrorCode MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *); 860 EXTERN PetscErrorCode MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*); 861 EXTERN PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat,Vec); 862 EXTERN PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec); 863 EXTERN PetscErrorCode MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *); 864 EXTERN PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat); 865 EXTERN PetscErrorCode MatGetRowMax_SeqSBAIJ(Mat,Vec); 866 867 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N(Mat,Vec,Vec); 868 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1(Mat,Vec,Vec); 869 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2(Mat,Vec,Vec); 870 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3(Mat,Vec,Vec); 871 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4(Mat,Vec,Vec); 872 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5(Mat,Vec,Vec); 873 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6(Mat,Vec,Vec); 874 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7(Mat,Vec,Vec); 875 876 EXTERN PetscErrorCode MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs); 877 878 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec); 879 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec); 880 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec); 881 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 882 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec); 883 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec); 884 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec); 885 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec); 886 887 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,MatFactorInfo*,Mat*); 888 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,MatFactorInfo*,Mat*); 889 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,MatFactorInfo*,Mat*); 890 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,MatFactorInfo*,Mat*); 891 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,MatFactorInfo*,Mat*); 892 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,MatFactorInfo*,Mat*); 893 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,MatFactorInfo*,Mat*); 894 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,MatFactorInfo*,Mat*); 895 EXTERN PetscErrorCode MatGetInertia_SeqSBAIJ(Mat,PetscInt*,PetscInt*,PetscInt*); 896 897 EXTERN PetscErrorCode MatMult_SeqSBAIJ_1(Mat,Vec,Vec); 898 EXTERN PetscErrorCode MatMult_SeqSBAIJ_2(Mat,Vec,Vec); 899 EXTERN PetscErrorCode MatMult_SeqSBAIJ_3(Mat,Vec,Vec); 900 EXTERN PetscErrorCode MatMult_SeqSBAIJ_4(Mat,Vec,Vec); 901 EXTERN PetscErrorCode MatMult_SeqSBAIJ_5(Mat,Vec,Vec); 902 EXTERN PetscErrorCode MatMult_SeqSBAIJ_6(Mat,Vec,Vec); 903 EXTERN PetscErrorCode MatMult_SeqSBAIJ_7(Mat,Vec,Vec); 904 EXTERN PetscErrorCode MatMult_SeqSBAIJ_N(Mat,Vec,Vec); 905 906 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec); 907 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec); 908 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec); 909 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec); 910 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec); 911 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec); 912 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec); 913 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec); 914 915 #ifdef HAVE_MatICCFactor 916 /* modified from MatILUFactor_SeqSBAIJ, needs further work! */ 917 #undef __FUNCT__ 918 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 919 PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info) 920 { 921 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 922 Mat outA; 923 PetscErrorCode ierr; 924 PetscTruth row_identity,col_identity; 925 926 PetscFunctionBegin; 927 outA = inA; 928 inA->factor = FACTOR_CHOLESKY; 929 930 if (!a->diag) { 931 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 932 } 933 /* 934 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 935 for ILU(0) factorization with natural ordering 936 */ 937 switch (a->bs) { 938 case 1: 939 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 940 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 941 inA->ops->solves = MatSolves_SeqSBAIJ_1; 942 ierr = PetscLoginfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n"));CHKERRQ(ierr); 943 case 2: 944 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 945 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 946 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 947 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"));CHKERRQ(ierr); 948 break; 949 case 3: 950 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 951 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 952 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 953 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n"));CHKERRQ(ierr); 954 break; 955 case 4: 956 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 957 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 958 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 959 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"));CHKERRQ(ierr); 960 break; 961 case 5: 962 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 963 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 964 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 965 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"));CHKERRQ(ierr); 966 break; 967 case 6: 968 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 969 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 970 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 971 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"));CHKERRQ(ierr); 972 break; 973 case 7: 974 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 975 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 976 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 977 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"));CHKERRQ(ierr); 978 break; 979 default: 980 a->row = row; 981 a->icol = col; 982 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 983 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 984 985 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 986 ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr); 987 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 988 989 if (!a->solve_work) { 990 ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 991 ierr = PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 992 } 993 } 994 995 ierr = MatCholeskyFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 996 PetscFunctionReturn(0); 997 } 998 #endif 999 1000 #undef __FUNCT__ 1001 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ" 1002 PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat A) 1003 { 1004 static PetscTruth called = PETSC_FALSE; 1005 MPI_Comm comm = A->comm; 1006 PetscErrorCode ierr; 1007 1008 PetscFunctionBegin; 1009 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1010 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1011 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1012 ierr = (*PetscHelpPrintf)(comm," -mat_ignore_ltriangular: Ignore lower triangular values set by user\n");CHKERRQ(ierr); 1013 PetscFunctionReturn(0); 1014 } 1015 1016 EXTERN_C_BEGIN 1017 #undef __FUNCT__ 1018 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1019 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,PetscInt *indices) 1020 { 1021 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1022 PetscInt i,nz,n; 1023 1024 PetscFunctionBegin; 1025 nz = baij->maxnz; 1026 n = mat->n; 1027 for (i=0; i<nz; i++) { 1028 baij->j[i] = indices[i]; 1029 } 1030 baij->nz = nz; 1031 for (i=0; i<n; i++) { 1032 baij->ilen[i] = baij->imax[i]; 1033 } 1034 PetscFunctionReturn(0); 1035 } 1036 EXTERN_C_END 1037 1038 #undef __FUNCT__ 1039 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1040 /*@ 1041 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1042 in the matrix. 1043 1044 Input Parameters: 1045 + mat - the SeqSBAIJ matrix 1046 - indices - the column indices 1047 1048 Level: advanced 1049 1050 Notes: 1051 This can be called if you have precomputed the nonzero structure of the 1052 matrix and want to provide it to the matrix object to improve the performance 1053 of the MatSetValues() operation. 1054 1055 You MUST have set the correct numbers of nonzeros per row in the call to 1056 MatCreateSeqSBAIJ(), and the columns indices MUST be sorted. 1057 1058 MUST be called before any calls to MatSetValues() 1059 1060 .seealso: MatCreateSeqSBAIJ 1061 @*/ 1062 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1063 { 1064 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1065 1066 PetscFunctionBegin; 1067 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1068 PetscValidPointer(indices,2); 1069 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1070 if (f) { 1071 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1072 } else { 1073 SETERRQ(PETSC_ERR_SUP,"Wrong type of matrix to set column indices"); 1074 } 1075 PetscFunctionReturn(0); 1076 } 1077 1078 #undef __FUNCT__ 1079 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1080 PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A) 1081 { 1082 PetscErrorCode ierr; 1083 1084 PetscFunctionBegin; 1085 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1086 PetscFunctionReturn(0); 1087 } 1088 1089 #undef __FUNCT__ 1090 #define __FUNCT__ "MatGetArray_SeqSBAIJ" 1091 PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1092 { 1093 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1094 PetscFunctionBegin; 1095 *array = a->a; 1096 PetscFunctionReturn(0); 1097 } 1098 1099 #undef __FUNCT__ 1100 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ" 1101 PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1102 { 1103 PetscFunctionBegin; 1104 PetscFunctionReturn(0); 1105 } 1106 1107 #include "petscblaslapack.h" 1108 #undef __FUNCT__ 1109 #define __FUNCT__ "MatAXPY_SeqSBAIJ" 1110 PetscErrorCode MatAXPY_SeqSBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str) 1111 { 1112 Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data; 1113 PetscErrorCode ierr; 1114 PetscInt i,bs=Y->bs,bs2,j; 1115 PetscBLASInt bnz = (PetscBLASInt)x->nz,one = 1; 1116 1117 PetscFunctionBegin; 1118 if (str == SAME_NONZERO_PATTERN) { 1119 BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1120 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1121 if (y->xtoy && y->XtoY != X) { 1122 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1123 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1124 } 1125 if (!y->xtoy) { /* get xtoy */ 1126 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1127 y->XtoY = X; 1128 } 1129 bs2 = bs*bs; 1130 for (i=0; i<x->nz; i++) { 1131 j = 0; 1132 while (j < bs2){ 1133 y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]); 1134 j++; 1135 } 1136 } 1137 ierr = PetscLogInfo((0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)));CHKERRQ(ierr); 1138 } else { 1139 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1140 } 1141 PetscFunctionReturn(0); 1142 } 1143 1144 #undef __FUNCT__ 1145 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ" 1146 PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg) 1147 { 1148 PetscFunctionBegin; 1149 *flg = PETSC_TRUE; 1150 PetscFunctionReturn(0); 1151 } 1152 1153 #undef __FUNCT__ 1154 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ" 1155 PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg) 1156 { 1157 PetscFunctionBegin; 1158 *flg = PETSC_TRUE; 1159 PetscFunctionReturn(0); 1160 } 1161 1162 #undef __FUNCT__ 1163 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ" 1164 PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscTruth *flg) 1165 { 1166 PetscFunctionBegin; 1167 *flg = PETSC_FALSE; 1168 PetscFunctionReturn(0); 1169 } 1170 1171 /* -------------------------------------------------------------------*/ 1172 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1173 MatGetRow_SeqSBAIJ, 1174 MatRestoreRow_SeqSBAIJ, 1175 MatMult_SeqSBAIJ_N, 1176 /* 4*/ MatMultAdd_SeqSBAIJ_N, 1177 MatMult_SeqSBAIJ_N, 1178 MatMultAdd_SeqSBAIJ_N, 1179 MatSolve_SeqSBAIJ_N, 1180 0, 1181 0, 1182 /*10*/ 0, 1183 0, 1184 MatCholeskyFactor_SeqSBAIJ, 1185 MatRelax_SeqSBAIJ, 1186 MatTranspose_SeqSBAIJ, 1187 /*15*/ MatGetInfo_SeqSBAIJ, 1188 MatEqual_SeqSBAIJ, 1189 MatGetDiagonal_SeqSBAIJ, 1190 MatDiagonalScale_SeqSBAIJ, 1191 MatNorm_SeqSBAIJ, 1192 /*20*/ 0, 1193 MatAssemblyEnd_SeqSBAIJ, 1194 0, 1195 MatSetOption_SeqSBAIJ, 1196 MatZeroEntries_SeqSBAIJ, 1197 /*25*/ 0, 1198 0, 1199 0, 1200 MatCholeskyFactorSymbolic_SeqSBAIJ, 1201 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1202 /*30*/ MatSetUpPreallocation_SeqSBAIJ, 1203 0, 1204 MatICCFactorSymbolic_SeqSBAIJ, 1205 MatGetArray_SeqSBAIJ, 1206 MatRestoreArray_SeqSBAIJ, 1207 /*35*/ MatDuplicate_SeqSBAIJ, 1208 0, 1209 0, 1210 0, 1211 0, 1212 /*40*/ MatAXPY_SeqSBAIJ, 1213 MatGetSubMatrices_SeqSBAIJ, 1214 MatIncreaseOverlap_SeqSBAIJ, 1215 MatGetValues_SeqSBAIJ, 1216 0, 1217 /*45*/ MatPrintHelp_SeqSBAIJ, 1218 MatScale_SeqSBAIJ, 1219 0, 1220 0, 1221 0, 1222 /*50*/ 0, 1223 MatGetRowIJ_SeqSBAIJ, 1224 MatRestoreRowIJ_SeqSBAIJ, 1225 0, 1226 0, 1227 /*55*/ 0, 1228 0, 1229 0, 1230 0, 1231 MatSetValuesBlocked_SeqSBAIJ, 1232 /*60*/ MatGetSubMatrix_SeqSBAIJ, 1233 0, 1234 0, 1235 MatGetPetscMaps_Petsc, 1236 0, 1237 /*65*/ 0, 1238 0, 1239 0, 1240 0, 1241 0, 1242 /*70*/ MatGetRowMax_SeqSBAIJ, 1243 0, 1244 0, 1245 0, 1246 0, 1247 /*75*/ 0, 1248 0, 1249 0, 1250 0, 1251 0, 1252 /*80*/ 0, 1253 0, 1254 0, 1255 #if !defined(PETSC_USE_COMPLEX) 1256 MatGetInertia_SeqSBAIJ, 1257 #else 1258 0, 1259 #endif 1260 MatLoad_SeqSBAIJ, 1261 /*85*/ MatIsSymmetric_SeqSBAIJ, 1262 MatIsHermitian_SeqSBAIJ, 1263 MatIsStructurallySymmetric_SeqSBAIJ, 1264 0, 1265 0, 1266 /*90*/ 0, 1267 0, 1268 0, 1269 0, 1270 0, 1271 /*95*/ 0, 1272 0, 1273 0, 1274 0}; 1275 1276 EXTERN_C_BEGIN 1277 #undef __FUNCT__ 1278 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1279 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqSBAIJ(Mat mat) 1280 { 1281 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1282 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 1283 PetscErrorCode ierr; 1284 1285 PetscFunctionBegin; 1286 if (aij->nonew != 1) { 1287 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1288 } 1289 1290 /* allocate space for values if not already there */ 1291 if (!aij->saved_values) { 1292 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1293 } 1294 1295 /* copy values over */ 1296 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1297 PetscFunctionReturn(0); 1298 } 1299 EXTERN_C_END 1300 1301 EXTERN_C_BEGIN 1302 #undef __FUNCT__ 1303 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1304 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqSBAIJ(Mat mat) 1305 { 1306 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1307 PetscErrorCode ierr; 1308 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 1309 1310 PetscFunctionBegin; 1311 if (aij->nonew != 1) { 1312 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1313 } 1314 if (!aij->saved_values) { 1315 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 1316 } 1317 1318 /* copy values over */ 1319 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1320 PetscFunctionReturn(0); 1321 } 1322 EXTERN_C_END 1323 1324 EXTERN_C_BEGIN 1325 #undef __FUNCT__ 1326 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ" 1327 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 1328 { 1329 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1330 PetscErrorCode ierr; 1331 PetscInt i,mbs,bs2; 1332 PetscTruth skipallocation = PETSC_FALSE,flg; 1333 1334 PetscFunctionBegin; 1335 B->preallocated = PETSC_TRUE; 1336 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1337 mbs = B->m/bs; 1338 bs2 = bs*bs; 1339 1340 if (mbs*bs != B->m) { 1341 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1342 } 1343 1344 if (nz == MAT_SKIP_ALLOCATION) { 1345 skipallocation = PETSC_TRUE; 1346 nz = 0; 1347 } 1348 1349 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1350 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 1351 if (nnz) { 1352 for (i=0; i<mbs; i++) { 1353 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 1354 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],mbs); 1355 } 1356 } 1357 1358 ierr = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1359 if (!flg) { 1360 switch (bs) { 1361 case 1: 1362 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1363 B->ops->solve = MatSolve_SeqSBAIJ_1; 1364 B->ops->solves = MatSolves_SeqSBAIJ_1; 1365 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 1366 B->ops->mult = MatMult_SeqSBAIJ_1; 1367 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1368 break; 1369 case 2: 1370 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1371 B->ops->solve = MatSolve_SeqSBAIJ_2; 1372 B->ops->solvetranspose = MatSolve_SeqSBAIJ_2; 1373 B->ops->mult = MatMult_SeqSBAIJ_2; 1374 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1375 break; 1376 case 3: 1377 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1378 B->ops->solve = MatSolve_SeqSBAIJ_3; 1379 B->ops->solvetranspose = MatSolve_SeqSBAIJ_3; 1380 B->ops->mult = MatMult_SeqSBAIJ_3; 1381 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1382 break; 1383 case 4: 1384 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1385 B->ops->solve = MatSolve_SeqSBAIJ_4; 1386 B->ops->solvetranspose = MatSolve_SeqSBAIJ_4; 1387 B->ops->mult = MatMult_SeqSBAIJ_4; 1388 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1389 break; 1390 case 5: 1391 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1392 B->ops->solve = MatSolve_SeqSBAIJ_5; 1393 B->ops->solvetranspose = MatSolve_SeqSBAIJ_5; 1394 B->ops->mult = MatMult_SeqSBAIJ_5; 1395 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1396 break; 1397 case 6: 1398 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1399 B->ops->solve = MatSolve_SeqSBAIJ_6; 1400 B->ops->solvetranspose = MatSolve_SeqSBAIJ_6; 1401 B->ops->mult = MatMult_SeqSBAIJ_6; 1402 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1403 break; 1404 case 7: 1405 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1406 B->ops->solve = MatSolve_SeqSBAIJ_7; 1407 B->ops->solvetranspose = MatSolve_SeqSBAIJ_7; 1408 B->ops->mult = MatMult_SeqSBAIJ_7; 1409 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1410 break; 1411 } 1412 } 1413 1414 b->mbs = mbs; 1415 b->nbs = mbs; 1416 if (!skipallocation) { 1417 /* b->ilen will count nonzeros in each block row so far. */ 1418 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 1419 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1420 if (!nnz) { 1421 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1422 else if (nz <= 0) nz = 1; 1423 for (i=0; i<mbs; i++) { 1424 b->imax[i] = nz; 1425 } 1426 nz = nz*mbs; /* total nz */ 1427 } else { 1428 nz = 0; 1429 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1430 } 1431 /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1432 1433 /* allocate the matrix space */ 1434 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->m+1,PetscInt,&b->i);CHKERRQ(ierr); 1435 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1436 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1437 b->singlemalloc = PETSC_TRUE; 1438 1439 /* pointer to beginning of each row */ 1440 b->i[0] = 0; 1441 for (i=1; i<mbs+1; i++) { 1442 b->i[i] = b->i[i-1] + b->imax[i-1]; 1443 } 1444 } 1445 1446 B->bs = bs; 1447 b->bs2 = bs2; 1448 b->nz = 0; 1449 b->maxnz = nz*bs2; 1450 1451 b->inew = 0; 1452 b->jnew = 0; 1453 b->anew = 0; 1454 b->a2anew = 0; 1455 b->permute = PETSC_FALSE; 1456 PetscFunctionReturn(0); 1457 } 1458 EXTERN_C_END 1459 1460 EXTERN_C_BEGIN 1461 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 1462 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqBAIJ(Mat, MatType,MatReuse,Mat*); 1463 EXTERN_C_END 1464 1465 /*MC 1466 MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices, 1467 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1468 1469 Options Database Keys: 1470 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions() 1471 1472 Level: beginner 1473 1474 .seealso: MatCreateSeqSBAIJ 1475 M*/ 1476 1477 EXTERN_C_BEGIN 1478 #undef __FUNCT__ 1479 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1480 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqSBAIJ(Mat B) 1481 { 1482 Mat_SeqSBAIJ *b; 1483 PetscErrorCode ierr; 1484 PetscMPIInt size; 1485 PetscTruth flg; 1486 1487 PetscFunctionBegin; 1488 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1489 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1490 B->m = B->M = PetscMax(B->m,B->M); 1491 B->n = B->N = PetscMax(B->n,B->N); 1492 1493 ierr = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1494 B->data = (void*)b; 1495 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1496 B->ops->destroy = MatDestroy_SeqSBAIJ; 1497 B->ops->view = MatView_SeqSBAIJ; 1498 B->factor = 0; 1499 B->mapping = 0; 1500 b->row = 0; 1501 b->icol = 0; 1502 b->reallocs = 0; 1503 b->saved_values = 0; 1504 1505 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1506 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1507 1508 b->sorted = PETSC_FALSE; 1509 b->roworiented = PETSC_TRUE; 1510 b->nonew = 0; 1511 b->diag = 0; 1512 b->solve_work = 0; 1513 b->mult_work = 0; 1514 B->spptr = 0; 1515 b->keepzeroedrows = PETSC_FALSE; 1516 b->xtoy = 0; 1517 b->XtoY = 0; 1518 1519 b->inew = 0; 1520 b->jnew = 0; 1521 b->anew = 0; 1522 b->a2anew = 0; 1523 b->permute = PETSC_FALSE; 1524 1525 b->ignore_ltriangular = PETSC_FALSE; 1526 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_ignore_lower_triangular",&flg);CHKERRQ(ierr); 1527 if (flg) b->ignore_ltriangular = PETSC_TRUE; 1528 1529 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1530 "MatStoreValues_SeqSBAIJ", 1531 MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1532 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1533 "MatRetrieveValues_SeqSBAIJ", 1534 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1535 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1536 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1537 MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1538 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C", 1539 "MatConvert_SeqSBAIJ_SeqAIJ", 1540 MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr); 1541 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C", 1542 "MatConvert_SeqSBAIJ_SeqBAIJ", 1543 MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr); 1544 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C", 1545 "MatSeqSBAIJSetPreallocation_SeqSBAIJ", 1546 MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr); 1547 1548 B->symmetric = PETSC_TRUE; 1549 B->structurally_symmetric = PETSC_TRUE; 1550 B->symmetric_set = PETSC_TRUE; 1551 B->structurally_symmetric_set = PETSC_TRUE; 1552 PetscFunctionReturn(0); 1553 } 1554 EXTERN_C_END 1555 1556 #undef __FUNCT__ 1557 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1558 /*@C 1559 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1560 compressed row) format. For good matrix assembly performance the 1561 user should preallocate the matrix storage by setting the parameter nz 1562 (or the array nnz). By setting these parameters accurately, performance 1563 during matrix assembly can be increased by more than a factor of 50. 1564 1565 Collective on Mat 1566 1567 Input Parameters: 1568 + A - the symmetric matrix 1569 . bs - size of block 1570 . nz - number of block nonzeros per block row (same for all rows) 1571 - nnz - array containing the number of block nonzeros in the upper triangular plus 1572 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1573 1574 Options Database Keys: 1575 . -mat_no_unroll - uses code that does not unroll the loops in the 1576 block calculations (much slower) 1577 . -mat_block_size - size of the blocks to use 1578 1579 Level: intermediate 1580 1581 Notes: 1582 Specify the preallocated storage with either nz or nnz (not both). 1583 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1584 allocation. For additional details, see the users manual chapter on 1585 matrices. 1586 1587 If the nnz parameter is given then the nz parameter is ignored 1588 1589 1590 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1591 @*/ 1592 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 1593 { 1594 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 1595 1596 PetscFunctionBegin; 1597 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1598 if (f) { 1599 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 1600 } 1601 PetscFunctionReturn(0); 1602 } 1603 1604 #undef __FUNCT__ 1605 #define __FUNCT__ "MatCreateSeqSBAIJ" 1606 /*@C 1607 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1608 compressed row) format. For good matrix assembly performance the 1609 user should preallocate the matrix storage by setting the parameter nz 1610 (or the array nnz). By setting these parameters accurately, performance 1611 during matrix assembly can be increased by more than a factor of 50. 1612 1613 Collective on MPI_Comm 1614 1615 Input Parameters: 1616 + comm - MPI communicator, set to PETSC_COMM_SELF 1617 . bs - size of block 1618 . m - number of rows, or number of columns 1619 . nz - number of block nonzeros per block row (same for all rows) 1620 - nnz - array containing the number of block nonzeros in the upper triangular plus 1621 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1622 1623 Output Parameter: 1624 . A - the symmetric matrix 1625 1626 Options Database Keys: 1627 . -mat_no_unroll - uses code that does not unroll the loops in the 1628 block calculations (much slower) 1629 . -mat_block_size - size of the blocks to use 1630 1631 Level: intermediate 1632 1633 Notes: 1634 1635 Specify the preallocated storage with either nz or nnz (not both). 1636 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1637 allocation. For additional details, see the users manual chapter on 1638 matrices. 1639 1640 If the nnz parameter is given then the nz parameter is ignored 1641 1642 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1643 @*/ 1644 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 1645 { 1646 PetscErrorCode ierr; 1647 1648 PetscFunctionBegin; 1649 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1650 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 1651 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1652 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 1653 PetscFunctionReturn(0); 1654 } 1655 1656 #undef __FUNCT__ 1657 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1658 PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1659 { 1660 Mat C; 1661 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1662 PetscErrorCode ierr; 1663 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 1664 1665 PetscFunctionBegin; 1666 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1667 1668 *B = 0; 1669 ierr = MatCreate(A->comm,&C);CHKERRQ(ierr); 1670 ierr = MatSetSizes(C,A->m,A->n,A->m,A->n);CHKERRQ(ierr); 1671 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 1672 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1673 c = (Mat_SeqSBAIJ*)C->data; 1674 1675 C->preallocated = PETSC_TRUE; 1676 C->factor = A->factor; 1677 c->row = 0; 1678 c->icol = 0; 1679 c->saved_values = 0; 1680 c->keepzeroedrows = a->keepzeroedrows; 1681 C->assembled = PETSC_TRUE; 1682 1683 C->M = A->M; 1684 C->N = A->N; 1685 C->bs = A->bs; 1686 c->bs2 = a->bs2; 1687 c->mbs = a->mbs; 1688 c->nbs = a->nbs; 1689 1690 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->imax);CHKERRQ(ierr); 1691 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->ilen);CHKERRQ(ierr); 1692 for (i=0; i<mbs; i++) { 1693 c->imax[i] = a->imax[i]; 1694 c->ilen[i] = a->ilen[i]; 1695 } 1696 ierr = PetscLogObjectMemory(C,2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr); 1697 1698 /* allocate the matrix space */ 1699 ierr = PetscMalloc3(bs2*nz,MatScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 1700 c->singlemalloc = PETSC_TRUE; 1701 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1702 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt)));CHKERRQ(ierr); 1703 if (mbs > 0) { 1704 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1705 if (cpvalues == MAT_COPY_VALUES) { 1706 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1707 } else { 1708 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1709 } 1710 } 1711 1712 c->sorted = a->sorted; 1713 c->roworiented = a->roworiented; 1714 c->nonew = a->nonew; 1715 1716 if (a->diag) { 1717 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 1718 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1719 for (i=0; i<mbs; i++) { 1720 c->diag[i] = a->diag[i]; 1721 } 1722 } else c->diag = 0; 1723 c->nz = a->nz; 1724 c->maxnz = a->maxnz; 1725 c->solve_work = 0; 1726 c->mult_work = 0; 1727 *B = C; 1728 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1729 PetscFunctionReturn(0); 1730 } 1731 1732 #undef __FUNCT__ 1733 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1734 PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer, MatType type,Mat *A) 1735 { 1736 Mat_SeqSBAIJ *a; 1737 Mat B; 1738 PetscErrorCode ierr; 1739 int fd; 1740 PetscMPIInt size; 1741 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 1742 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 1743 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 1744 PetscInt *masked,nmask,tmp,bs2,ishift; 1745 PetscScalar *aa; 1746 MPI_Comm comm = ((PetscObject)viewer)->comm; 1747 1748 PetscFunctionBegin; 1749 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1750 bs2 = bs*bs; 1751 1752 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1753 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1754 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1755 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1756 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1757 M = header[1]; N = header[2]; nz = header[3]; 1758 1759 if (header[3] < 0) { 1760 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1761 } 1762 1763 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1764 1765 /* 1766 This code adds extra rows to make sure the number of rows is 1767 divisible by the blocksize 1768 */ 1769 mbs = M/bs; 1770 extra_rows = bs - M + bs*(mbs); 1771 if (extra_rows == bs) extra_rows = 0; 1772 else mbs++; 1773 if (extra_rows) { 1774 ierr = PetscLogInfo((0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr); 1775 } 1776 1777 /* read in row lengths */ 1778 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1779 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1780 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1781 1782 /* read in column indices */ 1783 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1784 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1785 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1786 1787 /* loop over row lengths determining block row lengths */ 1788 ierr = PetscMalloc(mbs*sizeof(PetscInt),&s_browlengths);CHKERRQ(ierr); 1789 ierr = PetscMemzero(s_browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1790 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 1791 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1792 masked = mask + mbs; 1793 rowcount = 0; nzcount = 0; 1794 for (i=0; i<mbs; i++) { 1795 nmask = 0; 1796 for (j=0; j<bs; j++) { 1797 kmax = rowlengths[rowcount]; 1798 for (k=0; k<kmax; k++) { 1799 tmp = jj[nzcount++]/bs; /* block col. index */ 1800 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1801 } 1802 rowcount++; 1803 } 1804 s_browlengths[i] += nmask; 1805 1806 /* zero out the mask elements we set */ 1807 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1808 } 1809 1810 /* create our matrix */ 1811 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 1812 ierr = MatSetSizes(B,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 1813 ierr = MatSetType(B,type);CHKERRQ(ierr); 1814 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(B,bs,0,s_browlengths);CHKERRQ(ierr); 1815 a = (Mat_SeqSBAIJ*)B->data; 1816 1817 /* set matrix "i" values */ 1818 a->i[0] = 0; 1819 for (i=1; i<= mbs; i++) { 1820 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1821 a->ilen[i-1] = s_browlengths[i-1]; 1822 } 1823 a->nz = a->i[mbs]; 1824 1825 /* read in nonzero values */ 1826 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1827 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1828 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1829 1830 /* set "a" and "j" values into matrix */ 1831 nzcount = 0; jcount = 0; 1832 for (i=0; i<mbs; i++) { 1833 nzcountb = nzcount; 1834 nmask = 0; 1835 for (j=0; j<bs; j++) { 1836 kmax = rowlengths[i*bs+j]; 1837 for (k=0; k<kmax; k++) { 1838 tmp = jj[nzcount++]/bs; /* block col. index */ 1839 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1840 } 1841 } 1842 /* sort the masked values */ 1843 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1844 1845 /* set "j" values into matrix */ 1846 maskcount = 1; 1847 for (j=0; j<nmask; j++) { 1848 a->j[jcount++] = masked[j]; 1849 mask[masked[j]] = maskcount++; 1850 } 1851 1852 /* set "a" values into matrix */ 1853 ishift = bs2*a->i[i]; 1854 for (j=0; j<bs; j++) { 1855 kmax = rowlengths[i*bs+j]; 1856 for (k=0; k<kmax; k++) { 1857 tmp = jj[nzcountb]/bs ; /* block col. index */ 1858 if (tmp >= i){ 1859 block = mask[tmp] - 1; 1860 point = jj[nzcountb] - bs*tmp; 1861 idx = ishift + bs2*block + j + bs*point; 1862 a->a[idx] = aa[nzcountb]; 1863 } 1864 nzcountb++; 1865 } 1866 } 1867 /* zero out the mask elements we set */ 1868 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1869 } 1870 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1871 1872 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1873 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1874 ierr = PetscFree(aa);CHKERRQ(ierr); 1875 ierr = PetscFree(jj);CHKERRQ(ierr); 1876 ierr = PetscFree(mask);CHKERRQ(ierr); 1877 1878 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1879 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1880 ierr = MatView_Private(B);CHKERRQ(ierr); 1881 *A = B; 1882 PetscFunctionReturn(0); 1883 } 1884 1885 #undef __FUNCT__ 1886 #define __FUNCT__ "MatRelax_SeqSBAIJ" 1887 PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1888 { 1889 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1890 MatScalar *aa=a->a,*v,*v1; 1891 PetscScalar *x,*b,*t,sum,d; 1892 PetscErrorCode ierr; 1893 PetscInt m=a->mbs,bs=A->bs,*ai=a->i,*aj=a->j; 1894 PetscInt nz,nz1,*vj,*vj1,i; 1895 1896 PetscFunctionBegin; 1897 its = its*lits; 1898 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1899 1900 if (bs > 1) 1901 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 1902 1903 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1904 if (xx != bb) { 1905 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1906 } else { 1907 b = x; 1908 } 1909 1910 ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr); 1911 1912 if (flag & SOR_ZERO_INITIAL_GUESS) { 1913 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1914 for (i=0; i<m; i++) 1915 t[i] = b[i]; 1916 1917 for (i=0; i<m; i++){ 1918 d = *(aa + ai[i]); /* diag[i] */ 1919 v = aa + ai[i] + 1; 1920 vj = aj + ai[i] + 1; 1921 nz = ai[i+1] - ai[i] - 1; 1922 x[i] = omega*t[i]/d; 1923 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 1924 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 1925 } 1926 } 1927 1928 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 1929 for (i=0; i<m; i++) 1930 t[i] = b[i]; 1931 1932 for (i=0; i<m-1; i++){ /* update rhs */ 1933 v = aa + ai[i] + 1; 1934 vj = aj + ai[i] + 1; 1935 nz = ai[i+1] - ai[i] - 1; 1936 while (nz--) t[*vj++] -= x[i]*(*v++); 1937 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 1938 } 1939 for (i=m-1; i>=0; i--){ 1940 d = *(aa + ai[i]); 1941 v = aa + ai[i] + 1; 1942 vj = aj + ai[i] + 1; 1943 nz = ai[i+1] - ai[i] - 1; 1944 sum = t[i]; 1945 while (nz--) sum -= x[*vj++]*(*v++); 1946 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 1947 x[i] = (1-omega)*x[i] + omega*sum/d; 1948 } 1949 } 1950 its--; 1951 } 1952 1953 while (its--) { 1954 /* 1955 forward sweep: 1956 for i=0,...,m-1: 1957 sum[i] = (b[i] - U(i,:)x )/d[i]; 1958 x[i] = (1-omega)x[i] + omega*sum[i]; 1959 b = b - x[i]*U^T(i,:); 1960 1961 */ 1962 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1963 for (i=0; i<m; i++) 1964 t[i] = b[i]; 1965 1966 for (i=0; i<m; i++){ 1967 d = *(aa + ai[i]); /* diag[i] */ 1968 v = aa + ai[i] + 1; v1=v; 1969 vj = aj + ai[i] + 1; vj1=vj; 1970 nz = ai[i+1] - ai[i] - 1; nz1=nz; 1971 sum = t[i]; 1972 while (nz1--) sum -= (*v1++)*x[*vj1++]; 1973 x[i] = (1-omega)*x[i] + omega*sum/d; 1974 while (nz--) t[*vj++] -= x[i]*(*v++); 1975 ierr = PetscLogFlops(4*nz-2);CHKERRQ(ierr); 1976 } 1977 } 1978 1979 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 1980 /* 1981 backward sweep: 1982 b = b - x[i]*U^T(i,:), i=0,...,n-2 1983 for i=m-1,...,0: 1984 sum[i] = (b[i] - U(i,:)x )/d[i]; 1985 x[i] = (1-omega)x[i] + omega*sum[i]; 1986 */ 1987 for (i=0; i<m; i++) 1988 t[i] = b[i]; 1989 1990 for (i=0; i<m-1; i++){ /* update rhs */ 1991 v = aa + ai[i] + 1; 1992 vj = aj + ai[i] + 1; 1993 nz = ai[i+1] - ai[i] - 1; 1994 while (nz--) t[*vj++] -= x[i]*(*v++); 1995 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 1996 } 1997 for (i=m-1; i>=0; i--){ 1998 d = *(aa + ai[i]); 1999 v = aa + ai[i] + 1; 2000 vj = aj + ai[i] + 1; 2001 nz = ai[i+1] - ai[i] - 1; 2002 sum = t[i]; 2003 while (nz--) sum -= x[*vj++]*(*v++); 2004 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2005 x[i] = (1-omega)*x[i] + omega*sum/d; 2006 } 2007 } 2008 } 2009 2010 ierr = PetscFree(t);CHKERRQ(ierr); 2011 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2012 if (bb != xx) { 2013 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2014 } 2015 PetscFunctionReturn(0); 2016 } 2017 2018 2019 2020 2021 2022 2023