1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the SBAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 8 #include "src/inline/spops.h" 9 #include "src/mat/impls/sbaij/seq/sbaij.h" 10 11 #define CHUNKSIZE 10 12 13 /* 14 Checks for missing diagonals 15 */ 16 #undef __FUNCT__ 17 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 18 PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A) 19 { 20 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 21 PetscErrorCode ierr; 22 PetscInt *diag,*jj = a->j,i; 23 24 PetscFunctionBegin; 25 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 26 diag = a->diag; 27 for (i=0; i<a->mbs; i++) { 28 if (jj[diag[i]] != i) SETERRQ1(PETSC_ERR_ARG_CORRUPT,"Matrix is missing diagonal number %D",i); 29 } 30 PetscFunctionReturn(0); 31 } 32 33 #undef __FUNCT__ 34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 35 PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A) 36 { 37 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 38 PetscErrorCode ierr; 39 PetscInt i,mbs = a->mbs; 40 41 PetscFunctionBegin; 42 if (a->diag) PetscFunctionReturn(0); 43 44 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 45 ierr = PetscLogObjectMemory(A,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 46 for (i=0; i<mbs; i++) a->diag[i] = a->i[i]; 47 PetscFunctionReturn(0); 48 } 49 50 #undef __FUNCT__ 51 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 52 static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 53 { 54 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 55 PetscInt n = a->mbs,i; 56 57 PetscFunctionBegin; 58 *nn = n; 59 if (!ia) PetscFunctionReturn(0); 60 61 if (oshift == 1) { 62 /* temporarily add 1 to i and j indices */ 63 PetscInt nz = a->i[n]; 64 for (i=0; i<nz; i++) a->j[i]++; 65 for (i=0; i<n+1; i++) a->i[i]++; 66 *ia = a->i; *ja = a->j; 67 } else { 68 *ia = a->i; *ja = a->j; 69 } 70 PetscFunctionReturn(0); 71 } 72 73 #undef __FUNCT__ 74 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 75 static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 76 { 77 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 78 PetscInt i,n = a->mbs; 79 80 PetscFunctionBegin; 81 if (!ia) PetscFunctionReturn(0); 82 83 if (oshift == 1) { 84 PetscInt nz = a->i[n]-1; 85 for (i=0; i<nz; i++) a->j[i]--; 86 for (i=0; i<n+1; i++) a->i[i]--; 87 } 88 PetscFunctionReturn(0); 89 } 90 91 #undef __FUNCT__ 92 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 93 PetscErrorCode MatDestroy_SeqSBAIJ(Mat A) 94 { 95 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 96 PetscErrorCode ierr; 97 98 PetscFunctionBegin; 99 PetscLogObjectState((PetscObject)A,"Rows=%D, NZ=%D",A->m,a->nz); 100 if (!a->singlemalloc) { 101 if (a->a) {ierr = PetscFree(a->a);CHKERRQ(ierr);} 102 if (a->i) {ierr = PetscFree(a->i);CHKERRQ(ierr);} 103 if (a->j) {ierr = PetscFree(a->j);CHKERRQ(ierr);} 104 } else { 105 ierr = PetscFree3(a->a,a->i,a->j);CHKERRQ(ierr); 106 } 107 if (a->row) { 108 ierr = ISDestroy(a->row);CHKERRQ(ierr); 109 } 110 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 111 if (a->imax) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 112 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 113 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 114 if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);} 115 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 116 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 117 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 118 119 if (a->inew){ 120 ierr = PetscFree(a->inew);CHKERRQ(ierr); 121 a->inew = 0; 122 } 123 ierr = PetscFree(a);CHKERRQ(ierr); 124 125 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 126 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 127 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 128 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 129 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqbaij_C","",PETSC_NULL);CHKERRQ(ierr); 130 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 131 PetscFunctionReturn(0); 132 } 133 134 #undef __FUNCT__ 135 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 136 PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op) 137 { 138 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 139 PetscErrorCode ierr; 140 141 PetscFunctionBegin; 142 switch (op) { 143 case MAT_ROW_ORIENTED: 144 a->roworiented = PETSC_TRUE; 145 break; 146 case MAT_COLUMN_ORIENTED: 147 a->roworiented = PETSC_FALSE; 148 break; 149 case MAT_COLUMNS_SORTED: 150 a->sorted = PETSC_TRUE; 151 break; 152 case MAT_COLUMNS_UNSORTED: 153 a->sorted = PETSC_FALSE; 154 break; 155 case MAT_KEEP_ZEROED_ROWS: 156 a->keepzeroedrows = PETSC_TRUE; 157 break; 158 case MAT_NO_NEW_NONZERO_LOCATIONS: 159 a->nonew = 1; 160 break; 161 case MAT_NEW_NONZERO_LOCATION_ERR: 162 a->nonew = -1; 163 break; 164 case MAT_NEW_NONZERO_ALLOCATION_ERR: 165 a->nonew = -2; 166 break; 167 case MAT_YES_NEW_NONZERO_LOCATIONS: 168 a->nonew = 0; 169 break; 170 case MAT_ROWS_SORTED: 171 case MAT_ROWS_UNSORTED: 172 case MAT_YES_NEW_DIAGONALS: 173 case MAT_IGNORE_OFF_PROC_ENTRIES: 174 case MAT_USE_HASH_TABLE: 175 ierr = PetscLogInfo((A,"MatSetOption_SeqSBAIJ:Option ignored\n"));CHKERRQ(ierr); 176 break; 177 case MAT_NO_NEW_DIAGONALS: 178 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 179 case MAT_NOT_SYMMETRIC: 180 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 181 case MAT_HERMITIAN: 182 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 183 case MAT_SYMMETRIC: 184 case MAT_STRUCTURALLY_SYMMETRIC: 185 case MAT_NOT_HERMITIAN: 186 case MAT_SYMMETRY_ETERNAL: 187 case MAT_NOT_SYMMETRY_ETERNAL: 188 case MAT_IGNORE_LOWER_TRIANGULAR: 189 a->ignore_ltriangular = PETSC_TRUE; 190 break; 191 case MAT_ERROR_LOWER_TRIANGULAR: 192 a->ignore_ltriangular = PETSC_FALSE; 193 break; 194 default: 195 SETERRQ(PETSC_ERR_SUP,"unknown option"); 196 } 197 PetscFunctionReturn(0); 198 } 199 200 #undef __FUNCT__ 201 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 202 PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **v) 203 { 204 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 205 PetscErrorCode ierr; 206 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2; 207 MatScalar *aa,*aa_i; 208 PetscScalar *v_i; 209 210 PetscFunctionBegin; 211 bs = A->bs; 212 ai = a->i; 213 aj = a->j; 214 aa = a->a; 215 bs2 = a->bs2; 216 217 if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %D out of range", row); 218 219 bn = row/bs; /* Block number */ 220 bp = row % bs; /* Block position */ 221 M = ai[bn+1] - ai[bn]; 222 *ncols = bs*M; 223 224 if (v) { 225 *v = 0; 226 if (*ncols) { 227 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 228 for (i=0; i<M; i++) { /* for each block in the block row */ 229 v_i = *v + i*bs; 230 aa_i = aa + bs2*(ai[bn] + i); 231 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 232 } 233 } 234 } 235 236 if (cols) { 237 *cols = 0; 238 if (*ncols) { 239 ierr = PetscMalloc((*ncols+row)*sizeof(PetscInt),cols);CHKERRQ(ierr); 240 for (i=0; i<M; i++) { /* for each block in the block row */ 241 cols_i = *cols + i*bs; 242 itmp = bs*aj[ai[bn] + i]; 243 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 244 } 245 } 246 } 247 248 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 249 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 250 #ifdef column_search 251 v_i = *v + M*bs; 252 cols_i = *cols + M*bs; 253 for (i=0; i<bn; i++){ /* for each block row */ 254 M = ai[i+1] - ai[i]; 255 for (j=0; j<M; j++){ 256 itmp = aj[ai[i] + j]; /* block column value */ 257 if (itmp == bn){ 258 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 259 for (k=0; k<bs; k++) { 260 *cols_i++ = i*bs+k; 261 *v_i++ = aa_i[k]; 262 } 263 *ncols += bs; 264 break; 265 } 266 } 267 } 268 #endif 269 PetscFunctionReturn(0); 270 } 271 272 #undef __FUNCT__ 273 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 274 PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 275 { 276 PetscErrorCode ierr; 277 278 PetscFunctionBegin; 279 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 280 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 281 PetscFunctionReturn(0); 282 } 283 284 #undef __FUNCT__ 285 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 286 PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,Mat *B) 287 { 288 PetscErrorCode ierr; 289 PetscFunctionBegin; 290 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 291 PetscFunctionReturn(0); 292 } 293 294 #undef __FUNCT__ 295 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 296 static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 297 { 298 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 299 PetscErrorCode ierr; 300 PetscInt i,j,bs = A->bs,k,l,bs2=a->bs2; 301 char *name; 302 PetscViewerFormat format; 303 304 PetscFunctionBegin; 305 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 306 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 307 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 308 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 309 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 310 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 311 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 312 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 313 for (i=0; i<a->mbs; i++) { 314 for (j=0; j<bs; j++) { 315 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 316 for (k=a->i[i]; k<a->i[i+1]; k++) { 317 for (l=0; l<bs; l++) { 318 #if defined(PETSC_USE_COMPLEX) 319 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 320 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 321 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 322 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 323 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 324 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 325 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 326 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 327 } 328 #else 329 if (a->a[bs2*k + l*bs + j] != 0.0) { 330 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 331 } 332 #endif 333 } 334 } 335 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 336 } 337 } 338 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 339 } else { 340 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 341 for (i=0; i<a->mbs; i++) { 342 for (j=0; j<bs; j++) { 343 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 344 for (k=a->i[i]; k<a->i[i+1]; k++) { 345 for (l=0; l<bs; l++) { 346 #if defined(PETSC_USE_COMPLEX) 347 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 348 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 349 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 350 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 351 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 352 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 353 } else { 354 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 355 } 356 #else 357 ierr = PetscViewerASCIIPrintf(viewer," %D %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 358 #endif 359 } 360 } 361 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 362 } 363 } 364 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 365 } 366 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 367 PetscFunctionReturn(0); 368 } 369 370 #undef __FUNCT__ 371 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 372 static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 373 { 374 Mat A = (Mat) Aa; 375 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 376 PetscErrorCode ierr; 377 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->bs,bs2=a->bs2; 378 PetscMPIInt rank; 379 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 380 MatScalar *aa; 381 MPI_Comm comm; 382 PetscViewer viewer; 383 384 PetscFunctionBegin; 385 /* 386 This is nasty. If this is called from an originally parallel matrix 387 then all processes call this,but only the first has the matrix so the 388 rest should return immediately. 389 */ 390 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 391 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 392 if (rank) PetscFunctionReturn(0); 393 394 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 395 396 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 397 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 398 399 /* loop over matrix elements drawing boxes */ 400 color = PETSC_DRAW_BLUE; 401 for (i=0,row=0; i<mbs; i++,row+=bs) { 402 for (j=a->i[i]; j<a->i[i+1]; j++) { 403 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 404 x_l = a->j[j]*bs; x_r = x_l + 1.0; 405 aa = a->a + j*bs2; 406 for (k=0; k<bs; k++) { 407 for (l=0; l<bs; l++) { 408 if (PetscRealPart(*aa++) >= 0.) continue; 409 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 410 } 411 } 412 } 413 } 414 color = PETSC_DRAW_CYAN; 415 for (i=0,row=0; i<mbs; i++,row+=bs) { 416 for (j=a->i[i]; j<a->i[i+1]; j++) { 417 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 418 x_l = a->j[j]*bs; x_r = x_l + 1.0; 419 aa = a->a + j*bs2; 420 for (k=0; k<bs; k++) { 421 for (l=0; l<bs; l++) { 422 if (PetscRealPart(*aa++) != 0.) continue; 423 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 424 } 425 } 426 } 427 } 428 429 color = PETSC_DRAW_RED; 430 for (i=0,row=0; i<mbs; i++,row+=bs) { 431 for (j=a->i[i]; j<a->i[i+1]; j++) { 432 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 433 x_l = a->j[j]*bs; x_r = x_l + 1.0; 434 aa = a->a + j*bs2; 435 for (k=0; k<bs; k++) { 436 for (l=0; l<bs; l++) { 437 if (PetscRealPart(*aa++) <= 0.) continue; 438 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 439 } 440 } 441 } 442 } 443 PetscFunctionReturn(0); 444 } 445 446 #undef __FUNCT__ 447 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 448 static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 449 { 450 PetscErrorCode ierr; 451 PetscReal xl,yl,xr,yr,w,h; 452 PetscDraw draw; 453 PetscTruth isnull; 454 455 PetscFunctionBegin; 456 457 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 458 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 459 460 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 461 xr = A->m; yr = A->m; h = yr/10.0; w = xr/10.0; 462 xr += w; yr += h; xl = -w; yl = -h; 463 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 464 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 465 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 466 PetscFunctionReturn(0); 467 } 468 469 #undef __FUNCT__ 470 #define __FUNCT__ "MatView_SeqSBAIJ" 471 PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 472 { 473 PetscErrorCode ierr; 474 PetscTruth iascii,isdraw; 475 476 PetscFunctionBegin; 477 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 478 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 479 if (iascii){ 480 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 481 } else if (isdraw) { 482 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 483 } else { 484 Mat B; 485 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 486 ierr = MatView(B,viewer);CHKERRQ(ierr); 487 ierr = MatDestroy(B);CHKERRQ(ierr); 488 } 489 PetscFunctionReturn(0); 490 } 491 492 493 #undef __FUNCT__ 494 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 495 PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 496 { 497 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 498 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 499 PetscInt *ai = a->i,*ailen = a->ilen; 500 PetscInt brow,bcol,ridx,cidx,bs=A->bs,bs2=a->bs2; 501 MatScalar *ap,*aa = a->a,zero = 0.0; 502 503 PetscFunctionBegin; 504 for (k=0; k<m; k++) { /* loop over rows */ 505 row = im[k]; brow = row/bs; 506 if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",row); 507 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1); 508 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 509 nrow = ailen[brow]; 510 for (l=0; l<n; l++) { /* loop over columns */ 511 if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",in[l]); 512 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->n-1); 513 col = in[l] ; 514 bcol = col/bs; 515 cidx = col%bs; 516 ridx = row%bs; 517 high = nrow; 518 low = 0; /* assume unsorted */ 519 while (high-low > 5) { 520 t = (low+high)/2; 521 if (rp[t] > bcol) high = t; 522 else low = t; 523 } 524 for (i=low; i<high; i++) { 525 if (rp[i] > bcol) break; 526 if (rp[i] == bcol) { 527 *v++ = ap[bs2*i+bs*cidx+ridx]; 528 goto finished; 529 } 530 } 531 *v++ = zero; 532 finished:; 533 } 534 } 535 PetscFunctionReturn(0); 536 } 537 538 539 #undef __FUNCT__ 540 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 541 PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 542 { 543 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 544 PetscErrorCode ierr; 545 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 546 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 547 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->bs,stepval; 548 PetscTruth roworiented=a->roworiented; 549 const MatScalar *value = v; 550 MatScalar *ap,*aa = a->a,*bap; 551 552 PetscFunctionBegin; 553 if (roworiented) { 554 stepval = (n-1)*bs; 555 } else { 556 stepval = (m-1)*bs; 557 } 558 for (k=0; k<m; k++) { /* loop over added rows */ 559 row = im[k]; 560 if (row < 0) continue; 561 #if defined(PETSC_USE_DEBUG) 562 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 563 #endif 564 rp = aj + ai[row]; 565 ap = aa + bs2*ai[row]; 566 rmax = imax[row]; 567 nrow = ailen[row]; 568 low = 0; 569 high = nrow; 570 for (l=0; l<n; l++) { /* loop over added columns */ 571 if (in[l] < 0) continue; 572 col = in[l]; 573 #if defined(PETSC_USE_DEBUG) 574 if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",col,a->nbs-1); 575 #endif 576 if (col < row) continue; /* ignore lower triangular block */ 577 if (roworiented) { 578 value = v + k*(stepval+bs)*bs + l*bs; 579 } else { 580 value = v + l*(stepval+bs)*bs + k*bs; 581 } 582 if (col < lastcol) low = 0; else high = nrow; 583 lastcol = col; 584 while (high-low > 7) { 585 t = (low+high)/2; 586 if (rp[t] > col) high = t; 587 else low = t; 588 } 589 for (i=low; i<high; i++) { 590 if (rp[i] > col) break; 591 if (rp[i] == col) { 592 bap = ap + bs2*i; 593 if (roworiented) { 594 if (is == ADD_VALUES) { 595 for (ii=0; ii<bs; ii++,value+=stepval) { 596 for (jj=ii; jj<bs2; jj+=bs) { 597 bap[jj] += *value++; 598 } 599 } 600 } else { 601 for (ii=0; ii<bs; ii++,value+=stepval) { 602 for (jj=ii; jj<bs2; jj+=bs) { 603 bap[jj] = *value++; 604 } 605 } 606 } 607 } else { 608 if (is == ADD_VALUES) { 609 for (ii=0; ii<bs; ii++,value+=stepval) { 610 for (jj=0; jj<bs; jj++) { 611 *bap++ += *value++; 612 } 613 } 614 } else { 615 for (ii=0; ii<bs; ii++,value+=stepval) { 616 for (jj=0; jj<bs; jj++) { 617 *bap++ = *value++; 618 } 619 } 620 } 621 } 622 goto noinsert2; 623 } 624 } 625 if (nonew == 1) goto noinsert2; 626 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 627 if (nrow >= rmax) { 628 /* there is no extra room in row, therefore enlarge */ 629 PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 630 MatScalar *new_a; 631 632 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 633 634 /* malloc new storage space */ 635 ierr = PetscMalloc3(bs2*new_nz,PetscScalar,&new_a,new_nz,PetscInt,&new_j,a->mbs+1,PetscInt,&new_i);CHKERRQ(ierr); 636 637 /* copy over old data into new slots */ 638 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 639 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 640 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); 641 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 642 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); 643 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 644 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 645 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 646 /* free up old matrix storage */ 647 if (!a->singlemalloc) { 648 ierr = PetscFree(a->a);CHKERRQ(ierr); 649 ierr = PetscFree(a->i);CHKERRQ(ierr); 650 ierr = PetscFree(a->j);CHKERRQ(ierr); 651 } else { 652 ierr = PetscFree3(a->a,a->i,a->j);CHKERRQ(ierr); 653 } 654 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 655 a->singlemalloc = PETSC_TRUE; 656 657 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 658 rmax = imax[row] = imax[row] + CHUNKSIZE; 659 ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); 660 a->maxnz += bs2*CHUNKSIZE; 661 a->reallocs++; 662 a->nz++; 663 } 664 N = nrow++ - 1; 665 /* shift up all the later entries in this row */ 666 for (ii=N; ii>=i; ii--) { 667 rp[ii+1] = rp[ii]; 668 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 669 } 670 if (N >= i) { 671 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 672 } 673 rp[i] = col; 674 bap = ap + bs2*i; 675 if (roworiented) { 676 for (ii=0; ii<bs; ii++,value+=stepval) { 677 for (jj=ii; jj<bs2; jj+=bs) { 678 bap[jj] = *value++; 679 } 680 } 681 } else { 682 for (ii=0; ii<bs; ii++,value+=stepval) { 683 for (jj=0; jj<bs; jj++) { 684 *bap++ = *value++; 685 } 686 } 687 } 688 noinsert2:; 689 low = i; 690 } 691 ailen[row] = nrow; 692 } 693 PetscFunctionReturn(0); 694 } 695 696 #undef __FUNCT__ 697 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 698 PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 699 { 700 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 701 PetscErrorCode ierr; 702 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 703 PetscInt m = A->m,*ip,N,*ailen = a->ilen; 704 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 705 MatScalar *aa = a->a,*ap; 706 707 PetscFunctionBegin; 708 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 709 710 if (m) rmax = ailen[0]; 711 for (i=1; i<mbs; i++) { 712 /* move each row back by the amount of empty slots (fshift) before it*/ 713 fshift += imax[i-1] - ailen[i-1]; 714 rmax = PetscMax(rmax,ailen[i]); 715 if (fshift) { 716 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 717 N = ailen[i]; 718 for (j=0; j<N; j++) { 719 ip[j-fshift] = ip[j]; 720 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 721 } 722 } 723 ai[i] = ai[i-1] + ailen[i-1]; 724 } 725 if (mbs) { 726 fshift += imax[mbs-1] - ailen[mbs-1]; 727 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 728 } 729 /* reset ilen and imax for each row */ 730 for (i=0; i<mbs; i++) { 731 ailen[i] = imax[i] = ai[i+1] - ai[i]; 732 } 733 a->nz = ai[mbs]; 734 735 /* diagonals may have moved, reset it */ 736 if (a->diag) { 737 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 738 } 739 ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n", 740 m,A->m,A->bs,fshift*bs2,a->nz*bs2));CHKERRQ(ierr); 741 ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %D\n",a->reallocs));CHKERRQ(ierr); 742 ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %D\n",rmax));CHKERRQ(ierr); 743 a->reallocs = 0; 744 A->info.nz_unneeded = (PetscReal)fshift*bs2; 745 PetscFunctionReturn(0); 746 } 747 748 /* 749 This function returns an array of flags which indicate the locations of contiguous 750 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 751 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 752 Assume: sizes should be long enough to hold all the values. 753 */ 754 #undef __FUNCT__ 755 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 756 PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 757 { 758 PetscInt i,j,k,row; 759 PetscTruth flg; 760 761 PetscFunctionBegin; 762 for (i=0,j=0; i<n; j++) { 763 row = idx[i]; 764 if (row%bs!=0) { /* Not the begining of a block */ 765 sizes[j] = 1; 766 i++; 767 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 768 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 769 i++; 770 } else { /* Begining of the block, so check if the complete block exists */ 771 flg = PETSC_TRUE; 772 for (k=1; k<bs; k++) { 773 if (row+k != idx[i+k]) { /* break in the block */ 774 flg = PETSC_FALSE; 775 break; 776 } 777 } 778 if (flg) { /* No break in the bs */ 779 sizes[j] = bs; 780 i+= bs; 781 } else { 782 sizes[j] = 1; 783 i++; 784 } 785 } 786 } 787 *bs_max = j; 788 PetscFunctionReturn(0); 789 } 790 791 792 /* Only add/insert a(i,j) with i<=j (blocks). 793 Any a(i,j) with i>j input by user is ingored. 794 */ 795 796 #undef __FUNCT__ 797 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 798 PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 799 { 800 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 801 PetscErrorCode ierr; 802 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 803 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 804 PetscInt *aj=a->j,nonew=a->nonew,bs=A->bs,brow,bcol; 805 PetscInt ridx,cidx,bs2=a->bs2; 806 MatScalar *ap,value,*aa=a->a,*bap; 807 808 PetscFunctionBegin; 809 for (k=0; k<m; k++) { /* loop over added rows */ 810 row = im[k]; /* row number */ 811 brow = row/bs; /* block row number */ 812 if (row < 0) continue; 813 #if defined(PETSC_USE_DEBUG) 814 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1); 815 #endif 816 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 817 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 818 rmax = imax[brow]; /* maximum space allocated for this row */ 819 nrow = ailen[brow]; /* actual length of this row */ 820 low = 0; 821 822 for (l=0; l<n; l++) { /* loop over added columns */ 823 if (in[l] < 0) continue; 824 #if defined(PETSC_USE_DEBUG) 825 if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->m-1); 826 #endif 827 col = in[l]; 828 bcol = col/bs; /* block col number */ 829 830 if (brow > bcol) { 831 if (a->ignore_ltriangular){ 832 continue; /* ignore lower triangular values */ 833 } else { 834 SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR)"); 835 } 836 } 837 838 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 839 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 840 /* element value a(k,l) */ 841 if (roworiented) { 842 value = v[l + k*n]; 843 } else { 844 value = v[k + l*m]; 845 } 846 847 /* move pointer bap to a(k,l) quickly and add/insert value */ 848 if (col < lastcol) low = 0; high = nrow; 849 lastcol = col; 850 while (high-low > 7) { 851 t = (low+high)/2; 852 if (rp[t] > bcol) high = t; 853 else low = t; 854 } 855 for (i=low; i<high; i++) { 856 if (rp[i] > bcol) break; 857 if (rp[i] == bcol) { 858 bap = ap + bs2*i + bs*cidx + ridx; 859 if (is == ADD_VALUES) *bap += value; 860 else *bap = value; 861 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 862 if (brow == bcol && ridx < cidx){ 863 bap = ap + bs2*i + bs*ridx + cidx; 864 if (is == ADD_VALUES) *bap += value; 865 else *bap = value; 866 } 867 goto noinsert1; 868 } 869 } 870 871 if (nonew == 1) goto noinsert1; 872 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 873 if (nrow >= rmax) { 874 /* there is no extra room in row, therefore enlarge */ 875 PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 876 MatScalar *new_a; 877 878 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 879 880 /* Malloc new storage space */ 881 ierr = PetscMalloc3(bs2*new_nz,PetscScalar,&new_a,new_nz,PetscInt,&new_j,a->mbs+1,PetscInt,&new_i);CHKERRQ(ierr); 882 883 /* copy over old data into new slots */ 884 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 885 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 886 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); 887 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 888 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); 889 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 890 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 891 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 892 /* free up old matrix storage */ 893 if (!a->singlemalloc) { 894 ierr = PetscFree(a->a);CHKERRQ(ierr); 895 ierr = PetscFree(a->i);CHKERRQ(ierr); 896 ierr = PetscFree(a->j);CHKERRQ(ierr); 897 } else { 898 ierr = PetscFree3(a->a,a->i,a->j);CHKERRQ(ierr); 899 } 900 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 901 a->singlemalloc = PETSC_TRUE; 902 903 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 904 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 905 ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); 906 a->maxnz += bs2*CHUNKSIZE; 907 a->reallocs++; 908 a->nz++; 909 } 910 911 N = nrow++ - 1; 912 /* shift up all the later entries in this row */ 913 for (ii=N; ii>=i; ii--) { 914 rp[ii+1] = rp[ii]; 915 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 916 } 917 if (N>=i) { 918 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 919 } 920 rp[i] = bcol; 921 ap[bs2*i + bs*cidx + ridx] = value; 922 noinsert1:; 923 low = i; 924 } 925 } /* end of loop over added columns */ 926 ailen[brow] = nrow; 927 } /* end of loop over added rows */ 928 PetscFunctionReturn(0); 929 } 930 931 EXTERN PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*); 932 EXTERN PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*); 933 EXTERN PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat,PetscInt,IS[],PetscInt); 934 EXTERN PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,PetscInt,MatReuse,Mat*); 935 EXTERN PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat*[]); 936 EXTERN PetscErrorCode MatScale_SeqSBAIJ(const PetscScalar*,Mat); 937 EXTERN PetscErrorCode MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *); 938 EXTERN PetscErrorCode MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*); 939 EXTERN PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat,Vec); 940 EXTERN PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec); 941 EXTERN PetscErrorCode MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *); 942 EXTERN PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat); 943 EXTERN PetscErrorCode MatGetRowMax_SeqSBAIJ(Mat,Vec); 944 945 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N(Mat,Vec,Vec); 946 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1(Mat,Vec,Vec); 947 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2(Mat,Vec,Vec); 948 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3(Mat,Vec,Vec); 949 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4(Mat,Vec,Vec); 950 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5(Mat,Vec,Vec); 951 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6(Mat,Vec,Vec); 952 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7(Mat,Vec,Vec); 953 954 EXTERN PetscErrorCode MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs); 955 956 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec); 957 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec); 958 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec); 959 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 960 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec); 961 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec); 962 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec); 963 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec); 964 965 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,MatFactorInfo*,Mat*); 966 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,MatFactorInfo*,Mat*); 967 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,MatFactorInfo*,Mat*); 968 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,MatFactorInfo*,Mat*); 969 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,MatFactorInfo*,Mat*); 970 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,MatFactorInfo*,Mat*); 971 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,MatFactorInfo*,Mat*); 972 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,MatFactorInfo*,Mat*); 973 EXTERN PetscErrorCode MatGetInertia_SeqSBAIJ(Mat,PetscInt*,PetscInt*,PetscInt*); 974 975 EXTERN PetscErrorCode MatMult_SeqSBAIJ_1(Mat,Vec,Vec); 976 EXTERN PetscErrorCode MatMult_SeqSBAIJ_2(Mat,Vec,Vec); 977 EXTERN PetscErrorCode MatMult_SeqSBAIJ_3(Mat,Vec,Vec); 978 EXTERN PetscErrorCode MatMult_SeqSBAIJ_4(Mat,Vec,Vec); 979 EXTERN PetscErrorCode MatMult_SeqSBAIJ_5(Mat,Vec,Vec); 980 EXTERN PetscErrorCode MatMult_SeqSBAIJ_6(Mat,Vec,Vec); 981 EXTERN PetscErrorCode MatMult_SeqSBAIJ_7(Mat,Vec,Vec); 982 EXTERN PetscErrorCode MatMult_SeqSBAIJ_N(Mat,Vec,Vec); 983 984 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec); 985 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec); 986 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec); 987 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec); 988 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec); 989 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec); 990 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec); 991 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec); 992 993 #ifdef HAVE_MatICCFactor 994 /* modified from MatILUFactor_SeqSBAIJ, needs further work! */ 995 #undef __FUNCT__ 996 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 997 PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info) 998 { 999 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 1000 Mat outA; 1001 PetscErrorCode ierr; 1002 PetscTruth row_identity,col_identity; 1003 1004 PetscFunctionBegin; 1005 outA = inA; 1006 inA->factor = FACTOR_CHOLESKY; 1007 1008 if (!a->diag) { 1009 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 1010 } 1011 /* 1012 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1013 for ILU(0) factorization with natural ordering 1014 */ 1015 switch (a->bs) { 1016 case 1: 1017 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1018 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1019 inA->ops->solves = MatSolves_SeqSBAIJ_1; 1020 ierr = PetscLoginfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n"));CHKERRQ(ierr); 1021 case 2: 1022 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 1023 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1024 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1025 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"));CHKERRQ(ierr); 1026 break; 1027 case 3: 1028 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 1029 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1030 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1031 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n"));CHKERRQ(ierr); 1032 break; 1033 case 4: 1034 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 1035 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1036 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1037 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"));CHKERRQ(ierr); 1038 break; 1039 case 5: 1040 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 1041 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1042 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1043 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"));CHKERRQ(ierr); 1044 break; 1045 case 6: 1046 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 1047 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1048 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1049 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"));CHKERRQ(ierr); 1050 break; 1051 case 7: 1052 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 1053 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1054 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1055 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"));CHKERRQ(ierr); 1056 break; 1057 default: 1058 a->row = row; 1059 a->icol = col; 1060 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1061 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1062 1063 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1064 ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr); 1065 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 1066 1067 if (!a->solve_work) { 1068 ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1069 ierr = PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 1070 } 1071 } 1072 1073 ierr = MatCholeskyFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 1074 PetscFunctionReturn(0); 1075 } 1076 #endif 1077 1078 #undef __FUNCT__ 1079 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ" 1080 PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat A) 1081 { 1082 static PetscTruth called = PETSC_FALSE; 1083 MPI_Comm comm = A->comm; 1084 PetscErrorCode ierr; 1085 1086 PetscFunctionBegin; 1087 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1088 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1089 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1090 ierr = (*PetscHelpPrintf)(comm," -mat_ignore_ltriangular: Ignore lower triangular values set by user\n");CHKERRQ(ierr); 1091 PetscFunctionReturn(0); 1092 } 1093 1094 EXTERN_C_BEGIN 1095 #undef __FUNCT__ 1096 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1097 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,PetscInt *indices) 1098 { 1099 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1100 PetscInt i,nz,n; 1101 1102 PetscFunctionBegin; 1103 nz = baij->maxnz; 1104 n = mat->n; 1105 for (i=0; i<nz; i++) { 1106 baij->j[i] = indices[i]; 1107 } 1108 baij->nz = nz; 1109 for (i=0; i<n; i++) { 1110 baij->ilen[i] = baij->imax[i]; 1111 } 1112 PetscFunctionReturn(0); 1113 } 1114 EXTERN_C_END 1115 1116 #undef __FUNCT__ 1117 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1118 /*@ 1119 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1120 in the matrix. 1121 1122 Input Parameters: 1123 + mat - the SeqSBAIJ matrix 1124 - indices - the column indices 1125 1126 Level: advanced 1127 1128 Notes: 1129 This can be called if you have precomputed the nonzero structure of the 1130 matrix and want to provide it to the matrix object to improve the performance 1131 of the MatSetValues() operation. 1132 1133 You MUST have set the correct numbers of nonzeros per row in the call to 1134 MatCreateSeqSBAIJ(), and the columns indices MUST be sorted. 1135 1136 MUST be called before any calls to MatSetValues() 1137 1138 .seealso: MatCreateSeqSBAIJ 1139 @*/ 1140 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1141 { 1142 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1143 1144 PetscFunctionBegin; 1145 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1146 PetscValidPointer(indices,2); 1147 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1148 if (f) { 1149 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1150 } else { 1151 SETERRQ(PETSC_ERR_SUP,"Wrong type of matrix to set column indices"); 1152 } 1153 PetscFunctionReturn(0); 1154 } 1155 1156 #undef __FUNCT__ 1157 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1158 PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A) 1159 { 1160 PetscErrorCode ierr; 1161 1162 PetscFunctionBegin; 1163 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1164 PetscFunctionReturn(0); 1165 } 1166 1167 #undef __FUNCT__ 1168 #define __FUNCT__ "MatGetArray_SeqSBAIJ" 1169 PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1170 { 1171 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1172 PetscFunctionBegin; 1173 *array = a->a; 1174 PetscFunctionReturn(0); 1175 } 1176 1177 #undef __FUNCT__ 1178 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ" 1179 PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1180 { 1181 PetscFunctionBegin; 1182 PetscFunctionReturn(0); 1183 } 1184 1185 #include "petscblaslapack.h" 1186 #undef __FUNCT__ 1187 #define __FUNCT__ "MatAXPY_SeqSBAIJ" 1188 PetscErrorCode MatAXPY_SeqSBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str) 1189 { 1190 Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data; 1191 PetscErrorCode ierr; 1192 PetscInt i,bs=Y->bs,bs2,j; 1193 PetscBLASInt bnz = (PetscBLASInt)x->nz,one = 1; 1194 1195 PetscFunctionBegin; 1196 if (str == SAME_NONZERO_PATTERN) { 1197 BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1198 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1199 if (y->xtoy && y->XtoY != X) { 1200 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1201 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1202 } 1203 if (!y->xtoy) { /* get xtoy */ 1204 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1205 y->XtoY = X; 1206 } 1207 bs2 = bs*bs; 1208 for (i=0; i<x->nz; i++) { 1209 j = 0; 1210 while (j < bs2){ 1211 y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]); 1212 j++; 1213 } 1214 } 1215 ierr = PetscLogInfo((0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)));CHKERRQ(ierr); 1216 } else { 1217 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1218 } 1219 PetscFunctionReturn(0); 1220 } 1221 1222 #undef __FUNCT__ 1223 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ" 1224 PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg) 1225 { 1226 PetscFunctionBegin; 1227 *flg = PETSC_TRUE; 1228 PetscFunctionReturn(0); 1229 } 1230 1231 #undef __FUNCT__ 1232 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ" 1233 PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg) 1234 { 1235 PetscFunctionBegin; 1236 *flg = PETSC_TRUE; 1237 PetscFunctionReturn(0); 1238 } 1239 1240 #undef __FUNCT__ 1241 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ" 1242 PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscTruth *flg) 1243 { 1244 PetscFunctionBegin; 1245 *flg = PETSC_FALSE; 1246 PetscFunctionReturn(0); 1247 } 1248 1249 /* -------------------------------------------------------------------*/ 1250 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1251 MatGetRow_SeqSBAIJ, 1252 MatRestoreRow_SeqSBAIJ, 1253 MatMult_SeqSBAIJ_N, 1254 /* 4*/ MatMultAdd_SeqSBAIJ_N, 1255 MatMult_SeqSBAIJ_N, 1256 MatMultAdd_SeqSBAIJ_N, 1257 MatSolve_SeqSBAIJ_N, 1258 0, 1259 0, 1260 /*10*/ 0, 1261 0, 1262 MatCholeskyFactor_SeqSBAIJ, 1263 MatRelax_SeqSBAIJ, 1264 MatTranspose_SeqSBAIJ, 1265 /*15*/ MatGetInfo_SeqSBAIJ, 1266 MatEqual_SeqSBAIJ, 1267 MatGetDiagonal_SeqSBAIJ, 1268 MatDiagonalScale_SeqSBAIJ, 1269 MatNorm_SeqSBAIJ, 1270 /*20*/ 0, 1271 MatAssemblyEnd_SeqSBAIJ, 1272 0, 1273 MatSetOption_SeqSBAIJ, 1274 MatZeroEntries_SeqSBAIJ, 1275 /*25*/ 0, 1276 0, 1277 0, 1278 MatCholeskyFactorSymbolic_SeqSBAIJ, 1279 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1280 /*30*/ MatSetUpPreallocation_SeqSBAIJ, 1281 0, 1282 MatICCFactorSymbolic_SeqSBAIJ, 1283 MatGetArray_SeqSBAIJ, 1284 MatRestoreArray_SeqSBAIJ, 1285 /*35*/ MatDuplicate_SeqSBAIJ, 1286 0, 1287 0, 1288 0, 1289 0, 1290 /*40*/ MatAXPY_SeqSBAIJ, 1291 MatGetSubMatrices_SeqSBAIJ, 1292 MatIncreaseOverlap_SeqSBAIJ, 1293 MatGetValues_SeqSBAIJ, 1294 0, 1295 /*45*/ MatPrintHelp_SeqSBAIJ, 1296 MatScale_SeqSBAIJ, 1297 0, 1298 0, 1299 0, 1300 /*50*/ 0, 1301 MatGetRowIJ_SeqSBAIJ, 1302 MatRestoreRowIJ_SeqSBAIJ, 1303 0, 1304 0, 1305 /*55*/ 0, 1306 0, 1307 0, 1308 0, 1309 MatSetValuesBlocked_SeqSBAIJ, 1310 /*60*/ MatGetSubMatrix_SeqSBAIJ, 1311 0, 1312 0, 1313 MatGetPetscMaps_Petsc, 1314 0, 1315 /*65*/ 0, 1316 0, 1317 0, 1318 0, 1319 0, 1320 /*70*/ MatGetRowMax_SeqSBAIJ, 1321 0, 1322 0, 1323 0, 1324 0, 1325 /*75*/ 0, 1326 0, 1327 0, 1328 0, 1329 0, 1330 /*80*/ 0, 1331 0, 1332 0, 1333 #if !defined(PETSC_USE_COMPLEX) 1334 MatGetInertia_SeqSBAIJ, 1335 #else 1336 0, 1337 #endif 1338 MatLoad_SeqSBAIJ, 1339 /*85*/ MatIsSymmetric_SeqSBAIJ, 1340 MatIsHermitian_SeqSBAIJ, 1341 MatIsStructurallySymmetric_SeqSBAIJ, 1342 0, 1343 0, 1344 /*90*/ 0, 1345 0, 1346 0, 1347 0, 1348 0, 1349 /*95*/ 0, 1350 0, 1351 0, 1352 0}; 1353 1354 EXTERN_C_BEGIN 1355 #undef __FUNCT__ 1356 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1357 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqSBAIJ(Mat mat) 1358 { 1359 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1360 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 1361 PetscErrorCode ierr; 1362 1363 PetscFunctionBegin; 1364 if (aij->nonew != 1) { 1365 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1366 } 1367 1368 /* allocate space for values if not already there */ 1369 if (!aij->saved_values) { 1370 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1371 } 1372 1373 /* copy values over */ 1374 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1375 PetscFunctionReturn(0); 1376 } 1377 EXTERN_C_END 1378 1379 EXTERN_C_BEGIN 1380 #undef __FUNCT__ 1381 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1382 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqSBAIJ(Mat mat) 1383 { 1384 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1385 PetscErrorCode ierr; 1386 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 1387 1388 PetscFunctionBegin; 1389 if (aij->nonew != 1) { 1390 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1391 } 1392 if (!aij->saved_values) { 1393 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 1394 } 1395 1396 /* copy values over */ 1397 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1398 PetscFunctionReturn(0); 1399 } 1400 EXTERN_C_END 1401 1402 EXTERN_C_BEGIN 1403 #undef __FUNCT__ 1404 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ" 1405 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 1406 { 1407 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1408 PetscErrorCode ierr; 1409 PetscInt i,mbs,bs2; 1410 PetscTruth skipallocation = PETSC_FALSE,flg; 1411 1412 PetscFunctionBegin; 1413 B->preallocated = PETSC_TRUE; 1414 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1415 mbs = B->m/bs; 1416 bs2 = bs*bs; 1417 1418 if (mbs*bs != B->m) { 1419 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1420 } 1421 1422 if (nz == MAT_SKIP_ALLOCATION) { 1423 skipallocation = PETSC_TRUE; 1424 nz = 0; 1425 } 1426 1427 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1428 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 1429 if (nnz) { 1430 for (i=0; i<mbs; i++) { 1431 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 1432 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],mbs); 1433 } 1434 } 1435 1436 ierr = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1437 if (!flg) { 1438 switch (bs) { 1439 case 1: 1440 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1441 B->ops->solve = MatSolve_SeqSBAIJ_1; 1442 B->ops->solves = MatSolves_SeqSBAIJ_1; 1443 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 1444 B->ops->mult = MatMult_SeqSBAIJ_1; 1445 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1446 break; 1447 case 2: 1448 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1449 B->ops->solve = MatSolve_SeqSBAIJ_2; 1450 B->ops->solvetranspose = MatSolve_SeqSBAIJ_2; 1451 B->ops->mult = MatMult_SeqSBAIJ_2; 1452 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1453 break; 1454 case 3: 1455 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1456 B->ops->solve = MatSolve_SeqSBAIJ_3; 1457 B->ops->solvetranspose = MatSolve_SeqSBAIJ_3; 1458 B->ops->mult = MatMult_SeqSBAIJ_3; 1459 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1460 break; 1461 case 4: 1462 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1463 B->ops->solve = MatSolve_SeqSBAIJ_4; 1464 B->ops->solvetranspose = MatSolve_SeqSBAIJ_4; 1465 B->ops->mult = MatMult_SeqSBAIJ_4; 1466 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1467 break; 1468 case 5: 1469 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1470 B->ops->solve = MatSolve_SeqSBAIJ_5; 1471 B->ops->solvetranspose = MatSolve_SeqSBAIJ_5; 1472 B->ops->mult = MatMult_SeqSBAIJ_5; 1473 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1474 break; 1475 case 6: 1476 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1477 B->ops->solve = MatSolve_SeqSBAIJ_6; 1478 B->ops->solvetranspose = MatSolve_SeqSBAIJ_6; 1479 B->ops->mult = MatMult_SeqSBAIJ_6; 1480 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1481 break; 1482 case 7: 1483 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1484 B->ops->solve = MatSolve_SeqSBAIJ_7; 1485 B->ops->solvetranspose = MatSolve_SeqSBAIJ_7; 1486 B->ops->mult = MatMult_SeqSBAIJ_7; 1487 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1488 break; 1489 } 1490 } 1491 1492 b->mbs = mbs; 1493 b->nbs = mbs; 1494 if (!skipallocation) { 1495 /* b->ilen will count nonzeros in each block row so far. */ 1496 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 1497 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1498 if (!nnz) { 1499 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1500 else if (nz <= 0) nz = 1; 1501 for (i=0; i<mbs; i++) { 1502 b->imax[i] = nz; 1503 } 1504 nz = nz*mbs; /* total nz */ 1505 } else { 1506 nz = 0; 1507 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1508 } 1509 /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1510 1511 /* allocate the matrix space */ 1512 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->m+1,PetscInt,&b->i);CHKERRQ(ierr); 1513 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1514 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1515 b->singlemalloc = PETSC_TRUE; 1516 1517 /* pointer to beginning of each row */ 1518 b->i[0] = 0; 1519 for (i=1; i<mbs+1; i++) { 1520 b->i[i] = b->i[i-1] + b->imax[i-1]; 1521 } 1522 } 1523 1524 B->bs = bs; 1525 b->bs2 = bs2; 1526 b->nz = 0; 1527 b->maxnz = nz*bs2; 1528 1529 b->inew = 0; 1530 b->jnew = 0; 1531 b->anew = 0; 1532 b->a2anew = 0; 1533 b->permute = PETSC_FALSE; 1534 PetscFunctionReturn(0); 1535 } 1536 EXTERN_C_END 1537 1538 EXTERN_C_BEGIN 1539 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqAIJ(Mat,const MatType,MatReuse,Mat*); 1540 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqBAIJ(Mat,const MatType,MatReuse,Mat*); 1541 EXTERN_C_END 1542 1543 /*MC 1544 MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices, 1545 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1546 1547 Options Database Keys: 1548 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions() 1549 1550 Level: beginner 1551 1552 .seealso: MatCreateSeqSBAIJ 1553 M*/ 1554 1555 EXTERN_C_BEGIN 1556 #undef __FUNCT__ 1557 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1558 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqSBAIJ(Mat B) 1559 { 1560 Mat_SeqSBAIJ *b; 1561 PetscErrorCode ierr; 1562 PetscMPIInt size; 1563 PetscTruth flg; 1564 1565 PetscFunctionBegin; 1566 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1567 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1568 B->m = B->M = PetscMax(B->m,B->M); 1569 B->n = B->N = PetscMax(B->n,B->N); 1570 1571 ierr = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1572 B->data = (void*)b; 1573 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1574 B->ops->destroy = MatDestroy_SeqSBAIJ; 1575 B->ops->view = MatView_SeqSBAIJ; 1576 B->factor = 0; 1577 B->mapping = 0; 1578 b->row = 0; 1579 b->icol = 0; 1580 b->reallocs = 0; 1581 b->saved_values = 0; 1582 1583 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1584 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1585 1586 b->sorted = PETSC_FALSE; 1587 b->roworiented = PETSC_TRUE; 1588 b->nonew = 0; 1589 b->diag = 0; 1590 b->solve_work = 0; 1591 b->mult_work = 0; 1592 B->spptr = 0; 1593 b->keepzeroedrows = PETSC_FALSE; 1594 b->xtoy = 0; 1595 b->XtoY = 0; 1596 1597 b->inew = 0; 1598 b->jnew = 0; 1599 b->anew = 0; 1600 b->a2anew = 0; 1601 b->permute = PETSC_FALSE; 1602 1603 b->ignore_ltriangular = PETSC_FALSE; 1604 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_ignore_lower_triangular",&flg);CHKERRQ(ierr); 1605 if (flg) b->ignore_ltriangular = PETSC_TRUE; 1606 1607 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1608 "MatStoreValues_SeqSBAIJ", 1609 MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1610 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1611 "MatRetrieveValues_SeqSBAIJ", 1612 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1613 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1614 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1615 MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1616 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C", 1617 "MatConvert_SeqSBAIJ_SeqAIJ", 1618 MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr); 1619 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C", 1620 "MatConvert_SeqSBAIJ_SeqBAIJ", 1621 MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr); 1622 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C", 1623 "MatSeqSBAIJSetPreallocation_SeqSBAIJ", 1624 MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr); 1625 1626 B->symmetric = PETSC_TRUE; 1627 B->structurally_symmetric = PETSC_TRUE; 1628 B->symmetric_set = PETSC_TRUE; 1629 B->structurally_symmetric_set = PETSC_TRUE; 1630 PetscFunctionReturn(0); 1631 } 1632 EXTERN_C_END 1633 1634 #undef __FUNCT__ 1635 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1636 /*@C 1637 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1638 compressed row) format. For good matrix assembly performance the 1639 user should preallocate the matrix storage by setting the parameter nz 1640 (or the array nnz). By setting these parameters accurately, performance 1641 during matrix assembly can be increased by more than a factor of 50. 1642 1643 Collective on Mat 1644 1645 Input Parameters: 1646 + A - the symmetric matrix 1647 . bs - size of block 1648 . nz - number of block nonzeros per block row (same for all rows) 1649 - nnz - array containing the number of block nonzeros in the upper triangular plus 1650 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1651 1652 Options Database Keys: 1653 . -mat_no_unroll - uses code that does not unroll the loops in the 1654 block calculations (much slower) 1655 . -mat_block_size - size of the blocks to use 1656 1657 Level: intermediate 1658 1659 Notes: 1660 Specify the preallocated storage with either nz or nnz (not both). 1661 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1662 allocation. For additional details, see the users manual chapter on 1663 matrices. 1664 1665 If the nnz parameter is given then the nz parameter is ignored 1666 1667 1668 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1669 @*/ 1670 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 1671 { 1672 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 1673 1674 PetscFunctionBegin; 1675 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1676 if (f) { 1677 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 1678 } 1679 PetscFunctionReturn(0); 1680 } 1681 1682 #undef __FUNCT__ 1683 #define __FUNCT__ "MatCreateSeqSBAIJ" 1684 /*@C 1685 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1686 compressed row) format. For good matrix assembly performance the 1687 user should preallocate the matrix storage by setting the parameter nz 1688 (or the array nnz). By setting these parameters accurately, performance 1689 during matrix assembly can be increased by more than a factor of 50. 1690 1691 Collective on MPI_Comm 1692 1693 Input Parameters: 1694 + comm - MPI communicator, set to PETSC_COMM_SELF 1695 . bs - size of block 1696 . m - number of rows, or number of columns 1697 . nz - number of block nonzeros per block row (same for all rows) 1698 - nnz - array containing the number of block nonzeros in the upper triangular plus 1699 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1700 1701 Output Parameter: 1702 . A - the symmetric matrix 1703 1704 Options Database Keys: 1705 . -mat_no_unroll - uses code that does not unroll the loops in the 1706 block calculations (much slower) 1707 . -mat_block_size - size of the blocks to use 1708 1709 Level: intermediate 1710 1711 Notes: 1712 1713 Specify the preallocated storage with either nz or nnz (not both). 1714 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1715 allocation. For additional details, see the users manual chapter on 1716 matrices. 1717 1718 If the nnz parameter is given then the nz parameter is ignored 1719 1720 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1721 @*/ 1722 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 1723 { 1724 PetscErrorCode ierr; 1725 1726 PetscFunctionBegin; 1727 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1728 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1729 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 1730 PetscFunctionReturn(0); 1731 } 1732 1733 #undef __FUNCT__ 1734 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1735 PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1736 { 1737 Mat C; 1738 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1739 PetscErrorCode ierr; 1740 PetscInt i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 1741 1742 PetscFunctionBegin; 1743 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1744 1745 *B = 0; 1746 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1747 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 1748 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1749 c = (Mat_SeqSBAIJ*)C->data; 1750 1751 C->preallocated = PETSC_TRUE; 1752 C->factor = A->factor; 1753 c->row = 0; 1754 c->icol = 0; 1755 c->saved_values = 0; 1756 c->keepzeroedrows = a->keepzeroedrows; 1757 C->assembled = PETSC_TRUE; 1758 1759 C->M = A->M; 1760 C->N = A->N; 1761 C->bs = A->bs; 1762 c->bs2 = a->bs2; 1763 c->mbs = a->mbs; 1764 c->nbs = a->nbs; 1765 1766 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->imax);CHKERRQ(ierr); 1767 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->ilen);CHKERRQ(ierr); 1768 for (i=0; i<mbs; i++) { 1769 c->imax[i] = a->imax[i]; 1770 c->ilen[i] = a->ilen[i]; 1771 } 1772 1773 /* allocate the matrix space */ 1774 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 1775 c->singlemalloc = PETSC_TRUE; 1776 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1777 if (mbs > 0) { 1778 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1779 if (cpvalues == MAT_COPY_VALUES) { 1780 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1781 } else { 1782 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1783 } 1784 } 1785 1786 ierr = PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr); 1787 c->sorted = a->sorted; 1788 c->roworiented = a->roworiented; 1789 c->nonew = a->nonew; 1790 1791 if (a->diag) { 1792 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 1793 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1794 for (i=0; i<mbs; i++) { 1795 c->diag[i] = a->diag[i]; 1796 } 1797 } else c->diag = 0; 1798 c->nz = a->nz; 1799 c->maxnz = a->maxnz; 1800 c->solve_work = 0; 1801 c->mult_work = 0; 1802 *B = C; 1803 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1804 PetscFunctionReturn(0); 1805 } 1806 1807 #undef __FUNCT__ 1808 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1809 PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer,const MatType type,Mat *A) 1810 { 1811 Mat_SeqSBAIJ *a; 1812 Mat B; 1813 PetscErrorCode ierr; 1814 int fd; 1815 PetscMPIInt size; 1816 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 1817 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 1818 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 1819 PetscInt *masked,nmask,tmp,bs2,ishift; 1820 PetscScalar *aa; 1821 MPI_Comm comm = ((PetscObject)viewer)->comm; 1822 1823 PetscFunctionBegin; 1824 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1825 bs2 = bs*bs; 1826 1827 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1828 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1829 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1830 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1831 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1832 M = header[1]; N = header[2]; nz = header[3]; 1833 1834 if (header[3] < 0) { 1835 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1836 } 1837 1838 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1839 1840 /* 1841 This code adds extra rows to make sure the number of rows is 1842 divisible by the blocksize 1843 */ 1844 mbs = M/bs; 1845 extra_rows = bs - M + bs*(mbs); 1846 if (extra_rows == bs) extra_rows = 0; 1847 else mbs++; 1848 if (extra_rows) { 1849 ierr = PetscLogInfo((0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr); 1850 } 1851 1852 /* read in row lengths */ 1853 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1854 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1855 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1856 1857 /* read in column indices */ 1858 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1859 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1860 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1861 1862 /* loop over row lengths determining block row lengths */ 1863 ierr = PetscMalloc(mbs*sizeof(PetscInt),&s_browlengths);CHKERRQ(ierr); 1864 ierr = PetscMemzero(s_browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1865 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 1866 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1867 masked = mask + mbs; 1868 rowcount = 0; nzcount = 0; 1869 for (i=0; i<mbs; i++) { 1870 nmask = 0; 1871 for (j=0; j<bs; j++) { 1872 kmax = rowlengths[rowcount]; 1873 for (k=0; k<kmax; k++) { 1874 tmp = jj[nzcount++]/bs; /* block col. index */ 1875 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1876 } 1877 rowcount++; 1878 } 1879 s_browlengths[i] += nmask; 1880 1881 /* zero out the mask elements we set */ 1882 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1883 } 1884 1885 /* create our matrix */ 1886 ierr = MatCreate(comm,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows,&B);CHKERRQ(ierr); 1887 ierr = MatSetType(B,type);CHKERRQ(ierr); 1888 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(B,bs,0,s_browlengths);CHKERRQ(ierr); 1889 a = (Mat_SeqSBAIJ*)B->data; 1890 1891 /* set matrix "i" values */ 1892 a->i[0] = 0; 1893 for (i=1; i<= mbs; i++) { 1894 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1895 a->ilen[i-1] = s_browlengths[i-1]; 1896 } 1897 a->nz = a->i[mbs]; 1898 1899 /* read in nonzero values */ 1900 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1901 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1902 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1903 1904 /* set "a" and "j" values into matrix */ 1905 nzcount = 0; jcount = 0; 1906 for (i=0; i<mbs; i++) { 1907 nzcountb = nzcount; 1908 nmask = 0; 1909 for (j=0; j<bs; j++) { 1910 kmax = rowlengths[i*bs+j]; 1911 for (k=0; k<kmax; k++) { 1912 tmp = jj[nzcount++]/bs; /* block col. index */ 1913 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1914 } 1915 } 1916 /* sort the masked values */ 1917 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1918 1919 /* set "j" values into matrix */ 1920 maskcount = 1; 1921 for (j=0; j<nmask; j++) { 1922 a->j[jcount++] = masked[j]; 1923 mask[masked[j]] = maskcount++; 1924 } 1925 1926 /* set "a" values into matrix */ 1927 ishift = bs2*a->i[i]; 1928 for (j=0; j<bs; j++) { 1929 kmax = rowlengths[i*bs+j]; 1930 for (k=0; k<kmax; k++) { 1931 tmp = jj[nzcountb]/bs ; /* block col. index */ 1932 if (tmp >= i){ 1933 block = mask[tmp] - 1; 1934 point = jj[nzcountb] - bs*tmp; 1935 idx = ishift + bs2*block + j + bs*point; 1936 a->a[idx] = aa[nzcountb]; 1937 } 1938 nzcountb++; 1939 } 1940 } 1941 /* zero out the mask elements we set */ 1942 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1943 } 1944 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1945 1946 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1947 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1948 ierr = PetscFree(aa);CHKERRQ(ierr); 1949 ierr = PetscFree(jj);CHKERRQ(ierr); 1950 ierr = PetscFree(mask);CHKERRQ(ierr); 1951 1952 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1953 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1954 ierr = MatView_Private(B);CHKERRQ(ierr); 1955 *A = B; 1956 PetscFunctionReturn(0); 1957 } 1958 1959 #undef __FUNCT__ 1960 #define __FUNCT__ "MatRelax_SeqSBAIJ" 1961 PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1962 { 1963 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1964 MatScalar *aa=a->a,*v,*v1; 1965 PetscScalar *x,*b,*t,sum,d; 1966 PetscErrorCode ierr; 1967 PetscInt m=a->mbs,bs=A->bs,*ai=a->i,*aj=a->j; 1968 PetscInt nz,nz1,*vj,*vj1,i; 1969 1970 PetscFunctionBegin; 1971 its = its*lits; 1972 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1973 1974 if (bs > 1) 1975 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 1976 1977 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1978 if (xx != bb) { 1979 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1980 } else { 1981 b = x; 1982 } 1983 1984 ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr); 1985 1986 if (flag & SOR_ZERO_INITIAL_GUESS) { 1987 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1988 for (i=0; i<m; i++) 1989 t[i] = b[i]; 1990 1991 for (i=0; i<m; i++){ 1992 d = *(aa + ai[i]); /* diag[i] */ 1993 v = aa + ai[i] + 1; 1994 vj = aj + ai[i] + 1; 1995 nz = ai[i+1] - ai[i] - 1; 1996 x[i] = omega*t[i]/d; 1997 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 1998 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 1999 } 2000 } 2001 2002 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2003 for (i=0; i<m; i++) 2004 t[i] = b[i]; 2005 2006 for (i=0; i<m-1; i++){ /* update rhs */ 2007 v = aa + ai[i] + 1; 2008 vj = aj + ai[i] + 1; 2009 nz = ai[i+1] - ai[i] - 1; 2010 while (nz--) t[*vj++] -= x[i]*(*v++); 2011 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2012 } 2013 for (i=m-1; i>=0; i--){ 2014 d = *(aa + ai[i]); 2015 v = aa + ai[i] + 1; 2016 vj = aj + ai[i] + 1; 2017 nz = ai[i+1] - ai[i] - 1; 2018 sum = t[i]; 2019 while (nz--) sum -= x[*vj++]*(*v++); 2020 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2021 x[i] = (1-omega)*x[i] + omega*sum/d; 2022 } 2023 } 2024 its--; 2025 } 2026 2027 while (its--) { 2028 /* 2029 forward sweep: 2030 for i=0,...,m-1: 2031 sum[i] = (b[i] - U(i,:)x )/d[i]; 2032 x[i] = (1-omega)x[i] + omega*sum[i]; 2033 b = b - x[i]*U^T(i,:); 2034 2035 */ 2036 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2037 for (i=0; i<m; i++) 2038 t[i] = b[i]; 2039 2040 for (i=0; i<m; i++){ 2041 d = *(aa + ai[i]); /* diag[i] */ 2042 v = aa + ai[i] + 1; v1=v; 2043 vj = aj + ai[i] + 1; vj1=vj; 2044 nz = ai[i+1] - ai[i] - 1; nz1=nz; 2045 sum = t[i]; 2046 while (nz1--) sum -= (*v1++)*x[*vj1++]; 2047 x[i] = (1-omega)*x[i] + omega*sum/d; 2048 while (nz--) t[*vj++] -= x[i]*(*v++); 2049 ierr = PetscLogFlops(4*nz-2);CHKERRQ(ierr); 2050 } 2051 } 2052 2053 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2054 /* 2055 backward sweep: 2056 b = b - x[i]*U^T(i,:), i=0,...,n-2 2057 for i=m-1,...,0: 2058 sum[i] = (b[i] - U(i,:)x )/d[i]; 2059 x[i] = (1-omega)x[i] + omega*sum[i]; 2060 */ 2061 for (i=0; i<m; i++) 2062 t[i] = b[i]; 2063 2064 for (i=0; i<m-1; i++){ /* update rhs */ 2065 v = aa + ai[i] + 1; 2066 vj = aj + ai[i] + 1; 2067 nz = ai[i+1] - ai[i] - 1; 2068 while (nz--) t[*vj++] -= x[i]*(*v++); 2069 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2070 } 2071 for (i=m-1; i>=0; i--){ 2072 d = *(aa + ai[i]); 2073 v = aa + ai[i] + 1; 2074 vj = aj + ai[i] + 1; 2075 nz = ai[i+1] - ai[i] - 1; 2076 sum = t[i]; 2077 while (nz--) sum -= x[*vj++]*(*v++); 2078 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2079 x[i] = (1-omega)*x[i] + omega*sum/d; 2080 } 2081 } 2082 } 2083 2084 ierr = PetscFree(t);CHKERRQ(ierr); 2085 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2086 if (bb != xx) { 2087 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2088 } 2089 PetscFunctionReturn(0); 2090 } 2091 2092 2093 2094 2095 2096 2097