1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the SBAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 8 #include "src/inline/spops.h" 9 #include "src/mat/impls/sbaij/seq/sbaij.h" 10 11 #define CHUNKSIZE 10 12 13 /* 14 Checks for missing diagonals 15 */ 16 #undef __FUNCT__ 17 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 18 PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A) 19 { 20 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 21 PetscErrorCode ierr; 22 PetscInt *diag,*jj = a->j,i; 23 24 PetscFunctionBegin; 25 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 26 diag = a->diag; 27 for (i=0; i<a->mbs; i++) { 28 if (jj[diag[i]] != i) SETERRQ1(PETSC_ERR_ARG_CORRUPT,"Matrix is missing diagonal number %D",i); 29 } 30 PetscFunctionReturn(0); 31 } 32 33 #undef __FUNCT__ 34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 35 PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A) 36 { 37 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 38 PetscErrorCode ierr; 39 PetscInt i,mbs = a->mbs; 40 41 PetscFunctionBegin; 42 if (a->diag) PetscFunctionReturn(0); 43 44 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 45 ierr = PetscLogObjectMemory(A,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 46 for (i=0; i<mbs; i++) a->diag[i] = a->i[i]; 47 PetscFunctionReturn(0); 48 } 49 50 #undef __FUNCT__ 51 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 52 static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 53 { 54 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 55 PetscInt n = a->mbs,i; 56 57 PetscFunctionBegin; 58 *nn = n; 59 if (!ia) PetscFunctionReturn(0); 60 61 if (oshift == 1) { 62 /* temporarily add 1 to i and j indices */ 63 PetscInt nz = a->i[n]; 64 for (i=0; i<nz; i++) a->j[i]++; 65 for (i=0; i<n+1; i++) a->i[i]++; 66 *ia = a->i; *ja = a->j; 67 } else { 68 *ia = a->i; *ja = a->j; 69 } 70 PetscFunctionReturn(0); 71 } 72 73 #undef __FUNCT__ 74 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 75 static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 76 { 77 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 78 PetscInt i,n = a->mbs; 79 80 PetscFunctionBegin; 81 if (!ia) PetscFunctionReturn(0); 82 83 if (oshift == 1) { 84 PetscInt nz = a->i[n]-1; 85 for (i=0; i<nz; i++) a->j[i]--; 86 for (i=0; i<n+1; i++) a->i[i]--; 87 } 88 PetscFunctionReturn(0); 89 } 90 91 #undef __FUNCT__ 92 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 93 PetscErrorCode MatDestroy_SeqSBAIJ(Mat A) 94 { 95 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 96 PetscErrorCode ierr; 97 98 PetscFunctionBegin; 99 PetscLogObjectState((PetscObject)A,"Rows=%D, NZ=%D",A->m,a->nz); 100 if (!a->singlemalloc) { 101 ierr = PetscFree(a->a);CHKERRQ(ierr); 102 ierr = PetscFree(a->i);CHKERRQ(ierr); 103 ierr = PetscFree(a->j);CHKERRQ(ierr); 104 } else { 105 ierr = PetscFree3(a->a,a->i,a->j);CHKERRQ(ierr); 106 } 107 if (a->row) { 108 ierr = ISDestroy(a->row);CHKERRQ(ierr); 109 } 110 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 111 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 112 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 113 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 114 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 115 if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);} 116 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 117 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 118 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 119 120 if (a->inew){ 121 ierr = PetscFree(a->inew);CHKERRQ(ierr); 122 a->inew = 0; 123 } 124 ierr = PetscFree(a);CHKERRQ(ierr); 125 126 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 127 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 128 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 129 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 130 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqbaij_C","",PETSC_NULL);CHKERRQ(ierr); 131 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 132 PetscFunctionReturn(0); 133 } 134 135 #undef __FUNCT__ 136 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 137 PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op) 138 { 139 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 140 PetscErrorCode ierr; 141 142 PetscFunctionBegin; 143 switch (op) { 144 case MAT_ROW_ORIENTED: 145 a->roworiented = PETSC_TRUE; 146 break; 147 case MAT_COLUMN_ORIENTED: 148 a->roworiented = PETSC_FALSE; 149 break; 150 case MAT_COLUMNS_SORTED: 151 a->sorted = PETSC_TRUE; 152 break; 153 case MAT_COLUMNS_UNSORTED: 154 a->sorted = PETSC_FALSE; 155 break; 156 case MAT_KEEP_ZEROED_ROWS: 157 a->keepzeroedrows = PETSC_TRUE; 158 break; 159 case MAT_NO_NEW_NONZERO_LOCATIONS: 160 a->nonew = 1; 161 break; 162 case MAT_NEW_NONZERO_LOCATION_ERR: 163 a->nonew = -1; 164 break; 165 case MAT_NEW_NONZERO_ALLOCATION_ERR: 166 a->nonew = -2; 167 break; 168 case MAT_YES_NEW_NONZERO_LOCATIONS: 169 a->nonew = 0; 170 break; 171 case MAT_ROWS_SORTED: 172 case MAT_ROWS_UNSORTED: 173 case MAT_YES_NEW_DIAGONALS: 174 case MAT_IGNORE_OFF_PROC_ENTRIES: 175 case MAT_USE_HASH_TABLE: 176 ierr = PetscLogInfo((A,"MatSetOption_SeqSBAIJ:Option ignored\n"));CHKERRQ(ierr); 177 break; 178 case MAT_NO_NEW_DIAGONALS: 179 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 180 case MAT_NOT_SYMMETRIC: 181 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 182 case MAT_HERMITIAN: 183 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 184 case MAT_SYMMETRIC: 185 case MAT_STRUCTURALLY_SYMMETRIC: 186 case MAT_NOT_HERMITIAN: 187 case MAT_SYMMETRY_ETERNAL: 188 case MAT_NOT_SYMMETRY_ETERNAL: 189 case MAT_IGNORE_LOWER_TRIANGULAR: 190 a->ignore_ltriangular = PETSC_TRUE; 191 break; 192 case MAT_ERROR_LOWER_TRIANGULAR: 193 a->ignore_ltriangular = PETSC_FALSE; 194 break; 195 default: 196 SETERRQ(PETSC_ERR_SUP,"unknown option"); 197 } 198 PetscFunctionReturn(0); 199 } 200 201 #undef __FUNCT__ 202 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 203 PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **v) 204 { 205 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 206 PetscErrorCode ierr; 207 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2; 208 MatScalar *aa,*aa_i; 209 PetscScalar *v_i; 210 211 PetscFunctionBegin; 212 bs = A->bs; 213 ai = a->i; 214 aj = a->j; 215 aa = a->a; 216 bs2 = a->bs2; 217 218 if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %D out of range", row); 219 220 bn = row/bs; /* Block number */ 221 bp = row % bs; /* Block position */ 222 M = ai[bn+1] - ai[bn]; 223 *ncols = bs*M; 224 225 if (v) { 226 *v = 0; 227 if (*ncols) { 228 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 229 for (i=0; i<M; i++) { /* for each block in the block row */ 230 v_i = *v + i*bs; 231 aa_i = aa + bs2*(ai[bn] + i); 232 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 233 } 234 } 235 } 236 237 if (cols) { 238 *cols = 0; 239 if (*ncols) { 240 ierr = PetscMalloc((*ncols+row)*sizeof(PetscInt),cols);CHKERRQ(ierr); 241 for (i=0; i<M; i++) { /* for each block in the block row */ 242 cols_i = *cols + i*bs; 243 itmp = bs*aj[ai[bn] + i]; 244 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 245 } 246 } 247 } 248 249 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 250 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 251 #ifdef column_search 252 v_i = *v + M*bs; 253 cols_i = *cols + M*bs; 254 for (i=0; i<bn; i++){ /* for each block row */ 255 M = ai[i+1] - ai[i]; 256 for (j=0; j<M; j++){ 257 itmp = aj[ai[i] + j]; /* block column value */ 258 if (itmp == bn){ 259 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 260 for (k=0; k<bs; k++) { 261 *cols_i++ = i*bs+k; 262 *v_i++ = aa_i[k]; 263 } 264 *ncols += bs; 265 break; 266 } 267 } 268 } 269 #endif 270 PetscFunctionReturn(0); 271 } 272 273 #undef __FUNCT__ 274 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 275 PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 276 { 277 PetscErrorCode ierr; 278 279 PetscFunctionBegin; 280 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 281 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 282 PetscFunctionReturn(0); 283 } 284 285 #undef __FUNCT__ 286 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 287 PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,Mat *B) 288 { 289 PetscErrorCode ierr; 290 PetscFunctionBegin; 291 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 292 PetscFunctionReturn(0); 293 } 294 295 #undef __FUNCT__ 296 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 297 static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 298 { 299 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 300 PetscErrorCode ierr; 301 PetscInt i,j,bs = A->bs,k,l,bs2=a->bs2; 302 char *name; 303 PetscViewerFormat format; 304 305 PetscFunctionBegin; 306 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 307 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 308 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 309 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 310 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 311 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 312 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 313 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 314 for (i=0; i<a->mbs; i++) { 315 for (j=0; j<bs; j++) { 316 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 317 for (k=a->i[i]; k<a->i[i+1]; k++) { 318 for (l=0; l<bs; l++) { 319 #if defined(PETSC_USE_COMPLEX) 320 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 321 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 322 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 323 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 324 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 325 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 326 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 327 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 328 } 329 #else 330 if (a->a[bs2*k + l*bs + j] != 0.0) { 331 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 332 } 333 #endif 334 } 335 } 336 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 337 } 338 } 339 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 340 } else { 341 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 342 for (i=0; i<a->mbs; i++) { 343 for (j=0; j<bs; j++) { 344 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 345 for (k=a->i[i]; k<a->i[i+1]; k++) { 346 for (l=0; l<bs; l++) { 347 #if defined(PETSC_USE_COMPLEX) 348 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 349 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 350 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 351 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 352 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 353 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 354 } else { 355 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 356 } 357 #else 358 ierr = PetscViewerASCIIPrintf(viewer," %D %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 359 #endif 360 } 361 } 362 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 363 } 364 } 365 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 366 } 367 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 368 PetscFunctionReturn(0); 369 } 370 371 #undef __FUNCT__ 372 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 373 static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 374 { 375 Mat A = (Mat) Aa; 376 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 377 PetscErrorCode ierr; 378 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->bs,bs2=a->bs2; 379 PetscMPIInt rank; 380 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 381 MatScalar *aa; 382 MPI_Comm comm; 383 PetscViewer viewer; 384 385 PetscFunctionBegin; 386 /* 387 This is nasty. If this is called from an originally parallel matrix 388 then all processes call this,but only the first has the matrix so the 389 rest should return immediately. 390 */ 391 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 392 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 393 if (rank) PetscFunctionReturn(0); 394 395 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 396 397 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 398 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 399 400 /* loop over matrix elements drawing boxes */ 401 color = PETSC_DRAW_BLUE; 402 for (i=0,row=0; i<mbs; i++,row+=bs) { 403 for (j=a->i[i]; j<a->i[i+1]; j++) { 404 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 405 x_l = a->j[j]*bs; x_r = x_l + 1.0; 406 aa = a->a + j*bs2; 407 for (k=0; k<bs; k++) { 408 for (l=0; l<bs; l++) { 409 if (PetscRealPart(*aa++) >= 0.) continue; 410 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 411 } 412 } 413 } 414 } 415 color = PETSC_DRAW_CYAN; 416 for (i=0,row=0; i<mbs; i++,row+=bs) { 417 for (j=a->i[i]; j<a->i[i+1]; j++) { 418 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 419 x_l = a->j[j]*bs; x_r = x_l + 1.0; 420 aa = a->a + j*bs2; 421 for (k=0; k<bs; k++) { 422 for (l=0; l<bs; l++) { 423 if (PetscRealPart(*aa++) != 0.) continue; 424 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 425 } 426 } 427 } 428 } 429 430 color = PETSC_DRAW_RED; 431 for (i=0,row=0; i<mbs; i++,row+=bs) { 432 for (j=a->i[i]; j<a->i[i+1]; j++) { 433 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 434 x_l = a->j[j]*bs; x_r = x_l + 1.0; 435 aa = a->a + j*bs2; 436 for (k=0; k<bs; k++) { 437 for (l=0; l<bs; l++) { 438 if (PetscRealPart(*aa++) <= 0.) continue; 439 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 440 } 441 } 442 } 443 } 444 PetscFunctionReturn(0); 445 } 446 447 #undef __FUNCT__ 448 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 449 static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 450 { 451 PetscErrorCode ierr; 452 PetscReal xl,yl,xr,yr,w,h; 453 PetscDraw draw; 454 PetscTruth isnull; 455 456 PetscFunctionBegin; 457 458 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 459 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 460 461 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 462 xr = A->m; yr = A->m; h = yr/10.0; w = xr/10.0; 463 xr += w; yr += h; xl = -w; yl = -h; 464 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 465 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 466 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 467 PetscFunctionReturn(0); 468 } 469 470 #undef __FUNCT__ 471 #define __FUNCT__ "MatView_SeqSBAIJ" 472 PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 473 { 474 PetscErrorCode ierr; 475 PetscTruth iascii,isdraw; 476 477 PetscFunctionBegin; 478 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 479 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 480 if (iascii){ 481 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 482 } else if (isdraw) { 483 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 484 } else { 485 Mat B; 486 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 487 ierr = MatView(B,viewer);CHKERRQ(ierr); 488 ierr = MatDestroy(B);CHKERRQ(ierr); 489 } 490 PetscFunctionReturn(0); 491 } 492 493 494 #undef __FUNCT__ 495 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 496 PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 497 { 498 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 499 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 500 PetscInt *ai = a->i,*ailen = a->ilen; 501 PetscInt brow,bcol,ridx,cidx,bs=A->bs,bs2=a->bs2; 502 MatScalar *ap,*aa = a->a,zero = 0.0; 503 504 PetscFunctionBegin; 505 for (k=0; k<m; k++) { /* loop over rows */ 506 row = im[k]; brow = row/bs; 507 if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",row); 508 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1); 509 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 510 nrow = ailen[brow]; 511 for (l=0; l<n; l++) { /* loop over columns */ 512 if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",in[l]); 513 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->n-1); 514 col = in[l] ; 515 bcol = col/bs; 516 cidx = col%bs; 517 ridx = row%bs; 518 high = nrow; 519 low = 0; /* assume unsorted */ 520 while (high-low > 5) { 521 t = (low+high)/2; 522 if (rp[t] > bcol) high = t; 523 else low = t; 524 } 525 for (i=low; i<high; i++) { 526 if (rp[i] > bcol) break; 527 if (rp[i] == bcol) { 528 *v++ = ap[bs2*i+bs*cidx+ridx]; 529 goto finished; 530 } 531 } 532 *v++ = zero; 533 finished:; 534 } 535 } 536 PetscFunctionReturn(0); 537 } 538 539 540 #undef __FUNCT__ 541 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 542 PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 543 { 544 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 545 PetscErrorCode ierr; 546 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 547 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 548 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->bs,stepval; 549 PetscTruth roworiented=a->roworiented; 550 const MatScalar *value = v; 551 MatScalar *ap,*aa = a->a,*bap; 552 553 PetscFunctionBegin; 554 if (roworiented) { 555 stepval = (n-1)*bs; 556 } else { 557 stepval = (m-1)*bs; 558 } 559 for (k=0; k<m; k++) { /* loop over added rows */ 560 row = im[k]; 561 if (row < 0) continue; 562 #if defined(PETSC_USE_DEBUG) 563 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 564 #endif 565 rp = aj + ai[row]; 566 ap = aa + bs2*ai[row]; 567 rmax = imax[row]; 568 nrow = ailen[row]; 569 low = 0; 570 high = nrow; 571 for (l=0; l<n; l++) { /* loop over added columns */ 572 if (in[l] < 0) continue; 573 col = in[l]; 574 #if defined(PETSC_USE_DEBUG) 575 if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",col,a->nbs-1); 576 #endif 577 if (col < row) continue; /* ignore lower triangular block */ 578 if (roworiented) { 579 value = v + k*(stepval+bs)*bs + l*bs; 580 } else { 581 value = v + l*(stepval+bs)*bs + k*bs; 582 } 583 if (col < lastcol) low = 0; else high = nrow; 584 lastcol = col; 585 while (high-low > 7) { 586 t = (low+high)/2; 587 if (rp[t] > col) high = t; 588 else low = t; 589 } 590 for (i=low; i<high; i++) { 591 if (rp[i] > col) break; 592 if (rp[i] == col) { 593 bap = ap + bs2*i; 594 if (roworiented) { 595 if (is == ADD_VALUES) { 596 for (ii=0; ii<bs; ii++,value+=stepval) { 597 for (jj=ii; jj<bs2; jj+=bs) { 598 bap[jj] += *value++; 599 } 600 } 601 } else { 602 for (ii=0; ii<bs; ii++,value+=stepval) { 603 for (jj=ii; jj<bs2; jj+=bs) { 604 bap[jj] = *value++; 605 } 606 } 607 } 608 } else { 609 if (is == ADD_VALUES) { 610 for (ii=0; ii<bs; ii++,value+=stepval) { 611 for (jj=0; jj<bs; jj++) { 612 *bap++ += *value++; 613 } 614 } 615 } else { 616 for (ii=0; ii<bs; ii++,value+=stepval) { 617 for (jj=0; jj<bs; jj++) { 618 *bap++ = *value++; 619 } 620 } 621 } 622 } 623 goto noinsert2; 624 } 625 } 626 if (nonew == 1) goto noinsert2; 627 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 628 if (nrow >= rmax) { 629 /* there is no extra room in row, therefore enlarge */ 630 PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 631 MatScalar *new_a; 632 633 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 634 635 /* malloc new storage space */ 636 ierr = PetscMalloc3(bs2*new_nz,PetscScalar,&new_a,new_nz,PetscInt,&new_j,a->mbs+1,PetscInt,&new_i);CHKERRQ(ierr); 637 638 /* copy over old data into new slots */ 639 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 640 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 641 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); 642 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 643 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); 644 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 645 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 646 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 647 /* free up old matrix storage */ 648 if (!a->singlemalloc) { 649 ierr = PetscFree(a->a);CHKERRQ(ierr); 650 ierr = PetscFree(a->i);CHKERRQ(ierr); 651 ierr = PetscFree(a->j);CHKERRQ(ierr); 652 } else { 653 ierr = PetscFree3(a->a,a->i,a->j);CHKERRQ(ierr); 654 } 655 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 656 a->singlemalloc = PETSC_TRUE; 657 658 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 659 rmax = imax[row] = imax[row] + CHUNKSIZE; 660 ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); 661 a->maxnz += bs2*CHUNKSIZE; 662 a->reallocs++; 663 a->nz++; 664 } 665 N = nrow++ - 1; 666 /* shift up all the later entries in this row */ 667 for (ii=N; ii>=i; ii--) { 668 rp[ii+1] = rp[ii]; 669 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 670 } 671 if (N >= i) { 672 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 673 } 674 rp[i] = col; 675 bap = ap + bs2*i; 676 if (roworiented) { 677 for (ii=0; ii<bs; ii++,value+=stepval) { 678 for (jj=ii; jj<bs2; jj+=bs) { 679 bap[jj] = *value++; 680 } 681 } 682 } else { 683 for (ii=0; ii<bs; ii++,value+=stepval) { 684 for (jj=0; jj<bs; jj++) { 685 *bap++ = *value++; 686 } 687 } 688 } 689 noinsert2:; 690 low = i; 691 } 692 ailen[row] = nrow; 693 } 694 PetscFunctionReturn(0); 695 } 696 697 #undef __FUNCT__ 698 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 699 PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 700 { 701 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 702 PetscErrorCode ierr; 703 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 704 PetscInt m = A->m,*ip,N,*ailen = a->ilen; 705 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 706 MatScalar *aa = a->a,*ap; 707 708 PetscFunctionBegin; 709 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 710 711 if (m) rmax = ailen[0]; 712 for (i=1; i<mbs; i++) { 713 /* move each row back by the amount of empty slots (fshift) before it*/ 714 fshift += imax[i-1] - ailen[i-1]; 715 rmax = PetscMax(rmax,ailen[i]); 716 if (fshift) { 717 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 718 N = ailen[i]; 719 for (j=0; j<N; j++) { 720 ip[j-fshift] = ip[j]; 721 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 722 } 723 } 724 ai[i] = ai[i-1] + ailen[i-1]; 725 } 726 if (mbs) { 727 fshift += imax[mbs-1] - ailen[mbs-1]; 728 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 729 } 730 /* reset ilen and imax for each row */ 731 for (i=0; i<mbs; i++) { 732 ailen[i] = imax[i] = ai[i+1] - ai[i]; 733 } 734 a->nz = ai[mbs]; 735 736 /* diagonals may have moved, reset it */ 737 if (a->diag) { 738 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 739 } 740 ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n", 741 m,A->m,A->bs,fshift*bs2,a->nz*bs2));CHKERRQ(ierr); 742 ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %D\n",a->reallocs));CHKERRQ(ierr); 743 ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %D\n",rmax));CHKERRQ(ierr); 744 a->reallocs = 0; 745 A->info.nz_unneeded = (PetscReal)fshift*bs2; 746 PetscFunctionReturn(0); 747 } 748 749 /* 750 This function returns an array of flags which indicate the locations of contiguous 751 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 752 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 753 Assume: sizes should be long enough to hold all the values. 754 */ 755 #undef __FUNCT__ 756 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 757 PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 758 { 759 PetscInt i,j,k,row; 760 PetscTruth flg; 761 762 PetscFunctionBegin; 763 for (i=0,j=0; i<n; j++) { 764 row = idx[i]; 765 if (row%bs!=0) { /* Not the begining of a block */ 766 sizes[j] = 1; 767 i++; 768 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 769 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 770 i++; 771 } else { /* Begining of the block, so check if the complete block exists */ 772 flg = PETSC_TRUE; 773 for (k=1; k<bs; k++) { 774 if (row+k != idx[i+k]) { /* break in the block */ 775 flg = PETSC_FALSE; 776 break; 777 } 778 } 779 if (flg) { /* No break in the bs */ 780 sizes[j] = bs; 781 i+= bs; 782 } else { 783 sizes[j] = 1; 784 i++; 785 } 786 } 787 } 788 *bs_max = j; 789 PetscFunctionReturn(0); 790 } 791 792 793 /* Only add/insert a(i,j) with i<=j (blocks). 794 Any a(i,j) with i>j input by user is ingored. 795 */ 796 797 #undef __FUNCT__ 798 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 799 PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 800 { 801 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 802 PetscErrorCode ierr; 803 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 804 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 805 PetscInt *aj=a->j,nonew=a->nonew,bs=A->bs,brow,bcol; 806 PetscInt ridx,cidx,bs2=a->bs2; 807 MatScalar *ap,value,*aa=a->a,*bap; 808 809 PetscFunctionBegin; 810 for (k=0; k<m; k++) { /* loop over added rows */ 811 row = im[k]; /* row number */ 812 brow = row/bs; /* block row number */ 813 if (row < 0) continue; 814 #if defined(PETSC_USE_DEBUG) 815 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1); 816 #endif 817 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 818 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 819 rmax = imax[brow]; /* maximum space allocated for this row */ 820 nrow = ailen[brow]; /* actual length of this row */ 821 low = 0; 822 823 for (l=0; l<n; l++) { /* loop over added columns */ 824 if (in[l] < 0) continue; 825 #if defined(PETSC_USE_DEBUG) 826 if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->m-1); 827 #endif 828 col = in[l]; 829 bcol = col/bs; /* block col number */ 830 831 if (brow > bcol) { 832 if (a->ignore_ltriangular){ 833 continue; /* ignore lower triangular values */ 834 } else { 835 SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR)"); 836 } 837 } 838 839 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 840 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 841 /* element value a(k,l) */ 842 if (roworiented) { 843 value = v[l + k*n]; 844 } else { 845 value = v[k + l*m]; 846 } 847 848 /* move pointer bap to a(k,l) quickly and add/insert value */ 849 if (col < lastcol) low = 0; high = nrow; 850 lastcol = col; 851 while (high-low > 7) { 852 t = (low+high)/2; 853 if (rp[t] > bcol) high = t; 854 else low = t; 855 } 856 for (i=low; i<high; i++) { 857 if (rp[i] > bcol) break; 858 if (rp[i] == bcol) { 859 bap = ap + bs2*i + bs*cidx + ridx; 860 if (is == ADD_VALUES) *bap += value; 861 else *bap = value; 862 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 863 if (brow == bcol && ridx < cidx){ 864 bap = ap + bs2*i + bs*ridx + cidx; 865 if (is == ADD_VALUES) *bap += value; 866 else *bap = value; 867 } 868 goto noinsert1; 869 } 870 } 871 872 if (nonew == 1) goto noinsert1; 873 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 874 if (nrow >= rmax) { 875 /* there is no extra room in row, therefore enlarge */ 876 PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 877 MatScalar *new_a; 878 879 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 880 881 /* Malloc new storage space */ 882 ierr = PetscMalloc3(bs2*new_nz,PetscScalar,&new_a,new_nz,PetscInt,&new_j,a->mbs+1,PetscInt,&new_i);CHKERRQ(ierr); 883 884 /* copy over old data into new slots */ 885 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 886 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 887 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); 888 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 889 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); 890 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 891 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 892 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 893 /* free up old matrix storage */ 894 if (!a->singlemalloc) { 895 ierr = PetscFree(a->a);CHKERRQ(ierr); 896 ierr = PetscFree(a->i);CHKERRQ(ierr); 897 ierr = PetscFree(a->j);CHKERRQ(ierr); 898 } else { 899 ierr = PetscFree3(a->a,a->i,a->j);CHKERRQ(ierr); 900 } 901 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 902 a->singlemalloc = PETSC_TRUE; 903 904 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 905 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 906 ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); 907 a->maxnz += bs2*CHUNKSIZE; 908 a->reallocs++; 909 a->nz++; 910 } 911 912 N = nrow++ - 1; 913 /* shift up all the later entries in this row */ 914 for (ii=N; ii>=i; ii--) { 915 rp[ii+1] = rp[ii]; 916 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 917 } 918 if (N>=i) { 919 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 920 } 921 rp[i] = bcol; 922 ap[bs2*i + bs*cidx + ridx] = value; 923 noinsert1:; 924 low = i; 925 } 926 } /* end of loop over added columns */ 927 ailen[brow] = nrow; 928 } /* end of loop over added rows */ 929 PetscFunctionReturn(0); 930 } 931 932 EXTERN PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*); 933 EXTERN PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*); 934 EXTERN PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat,PetscInt,IS[],PetscInt); 935 EXTERN PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,PetscInt,MatReuse,Mat*); 936 EXTERN PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat*[]); 937 EXTERN PetscErrorCode MatScale_SeqSBAIJ(const PetscScalar*,Mat); 938 EXTERN PetscErrorCode MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *); 939 EXTERN PetscErrorCode MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*); 940 EXTERN PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat,Vec); 941 EXTERN PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec); 942 EXTERN PetscErrorCode MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *); 943 EXTERN PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat); 944 EXTERN PetscErrorCode MatGetRowMax_SeqSBAIJ(Mat,Vec); 945 946 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N(Mat,Vec,Vec); 947 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1(Mat,Vec,Vec); 948 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2(Mat,Vec,Vec); 949 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3(Mat,Vec,Vec); 950 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4(Mat,Vec,Vec); 951 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5(Mat,Vec,Vec); 952 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6(Mat,Vec,Vec); 953 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7(Mat,Vec,Vec); 954 955 EXTERN PetscErrorCode MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs); 956 957 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec); 958 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec); 959 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec); 960 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 961 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec); 962 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec); 963 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec); 964 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec); 965 966 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,MatFactorInfo*,Mat*); 967 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,MatFactorInfo*,Mat*); 968 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,MatFactorInfo*,Mat*); 969 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,MatFactorInfo*,Mat*); 970 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,MatFactorInfo*,Mat*); 971 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,MatFactorInfo*,Mat*); 972 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,MatFactorInfo*,Mat*); 973 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,MatFactorInfo*,Mat*); 974 EXTERN PetscErrorCode MatGetInertia_SeqSBAIJ(Mat,PetscInt*,PetscInt*,PetscInt*); 975 976 EXTERN PetscErrorCode MatMult_SeqSBAIJ_1(Mat,Vec,Vec); 977 EXTERN PetscErrorCode MatMult_SeqSBAIJ_2(Mat,Vec,Vec); 978 EXTERN PetscErrorCode MatMult_SeqSBAIJ_3(Mat,Vec,Vec); 979 EXTERN PetscErrorCode MatMult_SeqSBAIJ_4(Mat,Vec,Vec); 980 EXTERN PetscErrorCode MatMult_SeqSBAIJ_5(Mat,Vec,Vec); 981 EXTERN PetscErrorCode MatMult_SeqSBAIJ_6(Mat,Vec,Vec); 982 EXTERN PetscErrorCode MatMult_SeqSBAIJ_7(Mat,Vec,Vec); 983 EXTERN PetscErrorCode MatMult_SeqSBAIJ_N(Mat,Vec,Vec); 984 985 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec); 986 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec); 987 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec); 988 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec); 989 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec); 990 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec); 991 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec); 992 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec); 993 994 #ifdef HAVE_MatICCFactor 995 /* modified from MatILUFactor_SeqSBAIJ, needs further work! */ 996 #undef __FUNCT__ 997 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 998 PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info) 999 { 1000 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 1001 Mat outA; 1002 PetscErrorCode ierr; 1003 PetscTruth row_identity,col_identity; 1004 1005 PetscFunctionBegin; 1006 outA = inA; 1007 inA->factor = FACTOR_CHOLESKY; 1008 1009 if (!a->diag) { 1010 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 1011 } 1012 /* 1013 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1014 for ILU(0) factorization with natural ordering 1015 */ 1016 switch (a->bs) { 1017 case 1: 1018 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1019 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1020 inA->ops->solves = MatSolves_SeqSBAIJ_1; 1021 ierr = PetscLoginfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n"));CHKERRQ(ierr); 1022 case 2: 1023 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 1024 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1025 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1026 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"));CHKERRQ(ierr); 1027 break; 1028 case 3: 1029 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 1030 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1031 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1032 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n"));CHKERRQ(ierr); 1033 break; 1034 case 4: 1035 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 1036 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1037 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1038 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"));CHKERRQ(ierr); 1039 break; 1040 case 5: 1041 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 1042 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1043 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1044 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"));CHKERRQ(ierr); 1045 break; 1046 case 6: 1047 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 1048 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1049 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1050 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"));CHKERRQ(ierr); 1051 break; 1052 case 7: 1053 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 1054 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1055 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1056 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"));CHKERRQ(ierr); 1057 break; 1058 default: 1059 a->row = row; 1060 a->icol = col; 1061 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1062 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1063 1064 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1065 ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr); 1066 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 1067 1068 if (!a->solve_work) { 1069 ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1070 ierr = PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 1071 } 1072 } 1073 1074 ierr = MatCholeskyFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 1075 PetscFunctionReturn(0); 1076 } 1077 #endif 1078 1079 #undef __FUNCT__ 1080 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ" 1081 PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat A) 1082 { 1083 static PetscTruth called = PETSC_FALSE; 1084 MPI_Comm comm = A->comm; 1085 PetscErrorCode ierr; 1086 1087 PetscFunctionBegin; 1088 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1089 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1090 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1091 ierr = (*PetscHelpPrintf)(comm," -mat_ignore_ltriangular: Ignore lower triangular values set by user\n");CHKERRQ(ierr); 1092 PetscFunctionReturn(0); 1093 } 1094 1095 EXTERN_C_BEGIN 1096 #undef __FUNCT__ 1097 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1098 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,PetscInt *indices) 1099 { 1100 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1101 PetscInt i,nz,n; 1102 1103 PetscFunctionBegin; 1104 nz = baij->maxnz; 1105 n = mat->n; 1106 for (i=0; i<nz; i++) { 1107 baij->j[i] = indices[i]; 1108 } 1109 baij->nz = nz; 1110 for (i=0; i<n; i++) { 1111 baij->ilen[i] = baij->imax[i]; 1112 } 1113 PetscFunctionReturn(0); 1114 } 1115 EXTERN_C_END 1116 1117 #undef __FUNCT__ 1118 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1119 /*@ 1120 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1121 in the matrix. 1122 1123 Input Parameters: 1124 + mat - the SeqSBAIJ matrix 1125 - indices - the column indices 1126 1127 Level: advanced 1128 1129 Notes: 1130 This can be called if you have precomputed the nonzero structure of the 1131 matrix and want to provide it to the matrix object to improve the performance 1132 of the MatSetValues() operation. 1133 1134 You MUST have set the correct numbers of nonzeros per row in the call to 1135 MatCreateSeqSBAIJ(), and the columns indices MUST be sorted. 1136 1137 MUST be called before any calls to MatSetValues() 1138 1139 .seealso: MatCreateSeqSBAIJ 1140 @*/ 1141 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1142 { 1143 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1144 1145 PetscFunctionBegin; 1146 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1147 PetscValidPointer(indices,2); 1148 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1149 if (f) { 1150 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1151 } else { 1152 SETERRQ(PETSC_ERR_SUP,"Wrong type of matrix to set column indices"); 1153 } 1154 PetscFunctionReturn(0); 1155 } 1156 1157 #undef __FUNCT__ 1158 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1159 PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A) 1160 { 1161 PetscErrorCode ierr; 1162 1163 PetscFunctionBegin; 1164 ierr = MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1165 PetscFunctionReturn(0); 1166 } 1167 1168 #undef __FUNCT__ 1169 #define __FUNCT__ "MatGetArray_SeqSBAIJ" 1170 PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1171 { 1172 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1173 PetscFunctionBegin; 1174 *array = a->a; 1175 PetscFunctionReturn(0); 1176 } 1177 1178 #undef __FUNCT__ 1179 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ" 1180 PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1181 { 1182 PetscFunctionBegin; 1183 PetscFunctionReturn(0); 1184 } 1185 1186 #include "petscblaslapack.h" 1187 #undef __FUNCT__ 1188 #define __FUNCT__ "MatAXPY_SeqSBAIJ" 1189 PetscErrorCode MatAXPY_SeqSBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str) 1190 { 1191 Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data; 1192 PetscErrorCode ierr; 1193 PetscInt i,bs=Y->bs,bs2,j; 1194 PetscBLASInt bnz = (PetscBLASInt)x->nz,one = 1; 1195 1196 PetscFunctionBegin; 1197 if (str == SAME_NONZERO_PATTERN) { 1198 BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1199 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1200 if (y->xtoy && y->XtoY != X) { 1201 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1202 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1203 } 1204 if (!y->xtoy) { /* get xtoy */ 1205 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1206 y->XtoY = X; 1207 } 1208 bs2 = bs*bs; 1209 for (i=0; i<x->nz; i++) { 1210 j = 0; 1211 while (j < bs2){ 1212 y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]); 1213 j++; 1214 } 1215 } 1216 ierr = PetscLogInfo((0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)));CHKERRQ(ierr); 1217 } else { 1218 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1219 } 1220 PetscFunctionReturn(0); 1221 } 1222 1223 #undef __FUNCT__ 1224 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ" 1225 PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg) 1226 { 1227 PetscFunctionBegin; 1228 *flg = PETSC_TRUE; 1229 PetscFunctionReturn(0); 1230 } 1231 1232 #undef __FUNCT__ 1233 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ" 1234 PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg) 1235 { 1236 PetscFunctionBegin; 1237 *flg = PETSC_TRUE; 1238 PetscFunctionReturn(0); 1239 } 1240 1241 #undef __FUNCT__ 1242 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ" 1243 PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscTruth *flg) 1244 { 1245 PetscFunctionBegin; 1246 *flg = PETSC_FALSE; 1247 PetscFunctionReturn(0); 1248 } 1249 1250 /* -------------------------------------------------------------------*/ 1251 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1252 MatGetRow_SeqSBAIJ, 1253 MatRestoreRow_SeqSBAIJ, 1254 MatMult_SeqSBAIJ_N, 1255 /* 4*/ MatMultAdd_SeqSBAIJ_N, 1256 MatMult_SeqSBAIJ_N, 1257 MatMultAdd_SeqSBAIJ_N, 1258 MatSolve_SeqSBAIJ_N, 1259 0, 1260 0, 1261 /*10*/ 0, 1262 0, 1263 MatCholeskyFactor_SeqSBAIJ, 1264 MatRelax_SeqSBAIJ, 1265 MatTranspose_SeqSBAIJ, 1266 /*15*/ MatGetInfo_SeqSBAIJ, 1267 MatEqual_SeqSBAIJ, 1268 MatGetDiagonal_SeqSBAIJ, 1269 MatDiagonalScale_SeqSBAIJ, 1270 MatNorm_SeqSBAIJ, 1271 /*20*/ 0, 1272 MatAssemblyEnd_SeqSBAIJ, 1273 0, 1274 MatSetOption_SeqSBAIJ, 1275 MatZeroEntries_SeqSBAIJ, 1276 /*25*/ 0, 1277 0, 1278 0, 1279 MatCholeskyFactorSymbolic_SeqSBAIJ, 1280 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1281 /*30*/ MatSetUpPreallocation_SeqSBAIJ, 1282 0, 1283 MatICCFactorSymbolic_SeqSBAIJ, 1284 MatGetArray_SeqSBAIJ, 1285 MatRestoreArray_SeqSBAIJ, 1286 /*35*/ MatDuplicate_SeqSBAIJ, 1287 0, 1288 0, 1289 0, 1290 0, 1291 /*40*/ MatAXPY_SeqSBAIJ, 1292 MatGetSubMatrices_SeqSBAIJ, 1293 MatIncreaseOverlap_SeqSBAIJ, 1294 MatGetValues_SeqSBAIJ, 1295 0, 1296 /*45*/ MatPrintHelp_SeqSBAIJ, 1297 MatScale_SeqSBAIJ, 1298 0, 1299 0, 1300 0, 1301 /*50*/ 0, 1302 MatGetRowIJ_SeqSBAIJ, 1303 MatRestoreRowIJ_SeqSBAIJ, 1304 0, 1305 0, 1306 /*55*/ 0, 1307 0, 1308 0, 1309 0, 1310 MatSetValuesBlocked_SeqSBAIJ, 1311 /*60*/ MatGetSubMatrix_SeqSBAIJ, 1312 0, 1313 0, 1314 MatGetPetscMaps_Petsc, 1315 0, 1316 /*65*/ 0, 1317 0, 1318 0, 1319 0, 1320 0, 1321 /*70*/ MatGetRowMax_SeqSBAIJ, 1322 0, 1323 0, 1324 0, 1325 0, 1326 /*75*/ 0, 1327 0, 1328 0, 1329 0, 1330 0, 1331 /*80*/ 0, 1332 0, 1333 0, 1334 #if !defined(PETSC_USE_COMPLEX) 1335 MatGetInertia_SeqSBAIJ, 1336 #else 1337 0, 1338 #endif 1339 MatLoad_SeqSBAIJ, 1340 /*85*/ MatIsSymmetric_SeqSBAIJ, 1341 MatIsHermitian_SeqSBAIJ, 1342 MatIsStructurallySymmetric_SeqSBAIJ, 1343 0, 1344 0, 1345 /*90*/ 0, 1346 0, 1347 0, 1348 0, 1349 0, 1350 /*95*/ 0, 1351 0, 1352 0, 1353 0}; 1354 1355 EXTERN_C_BEGIN 1356 #undef __FUNCT__ 1357 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1358 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqSBAIJ(Mat mat) 1359 { 1360 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1361 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 1362 PetscErrorCode ierr; 1363 1364 PetscFunctionBegin; 1365 if (aij->nonew != 1) { 1366 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1367 } 1368 1369 /* allocate space for values if not already there */ 1370 if (!aij->saved_values) { 1371 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1372 } 1373 1374 /* copy values over */ 1375 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1376 PetscFunctionReturn(0); 1377 } 1378 EXTERN_C_END 1379 1380 EXTERN_C_BEGIN 1381 #undef __FUNCT__ 1382 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1383 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqSBAIJ(Mat mat) 1384 { 1385 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1386 PetscErrorCode ierr; 1387 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 1388 1389 PetscFunctionBegin; 1390 if (aij->nonew != 1) { 1391 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1392 } 1393 if (!aij->saved_values) { 1394 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 1395 } 1396 1397 /* copy values over */ 1398 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1399 PetscFunctionReturn(0); 1400 } 1401 EXTERN_C_END 1402 1403 EXTERN_C_BEGIN 1404 #undef __FUNCT__ 1405 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ" 1406 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 1407 { 1408 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1409 PetscErrorCode ierr; 1410 PetscInt i,len,mbs,bs2; 1411 PetscTruth flg; 1412 1413 PetscFunctionBegin; 1414 B->preallocated = PETSC_TRUE; 1415 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1416 mbs = B->m/bs; 1417 bs2 = bs*bs; 1418 1419 if (mbs*bs != B->m) { 1420 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1421 } 1422 1423 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1424 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 1425 if (nnz) { 1426 for (i=0; i<mbs; i++) { 1427 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 1428 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],mbs); 1429 } 1430 } 1431 1432 ierr = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1433 if (!flg) { 1434 switch (bs) { 1435 case 1: 1436 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1437 B->ops->solve = MatSolve_SeqSBAIJ_1; 1438 B->ops->solves = MatSolves_SeqSBAIJ_1; 1439 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 1440 B->ops->mult = MatMult_SeqSBAIJ_1; 1441 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1442 break; 1443 case 2: 1444 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1445 B->ops->solve = MatSolve_SeqSBAIJ_2; 1446 B->ops->solvetranspose = MatSolve_SeqSBAIJ_2; 1447 B->ops->mult = MatMult_SeqSBAIJ_2; 1448 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1449 break; 1450 case 3: 1451 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1452 B->ops->solve = MatSolve_SeqSBAIJ_3; 1453 B->ops->solvetranspose = MatSolve_SeqSBAIJ_3; 1454 B->ops->mult = MatMult_SeqSBAIJ_3; 1455 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1456 break; 1457 case 4: 1458 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1459 B->ops->solve = MatSolve_SeqSBAIJ_4; 1460 B->ops->solvetranspose = MatSolve_SeqSBAIJ_4; 1461 B->ops->mult = MatMult_SeqSBAIJ_4; 1462 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1463 break; 1464 case 5: 1465 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1466 B->ops->solve = MatSolve_SeqSBAIJ_5; 1467 B->ops->solvetranspose = MatSolve_SeqSBAIJ_5; 1468 B->ops->mult = MatMult_SeqSBAIJ_5; 1469 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1470 break; 1471 case 6: 1472 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1473 B->ops->solve = MatSolve_SeqSBAIJ_6; 1474 B->ops->solvetranspose = MatSolve_SeqSBAIJ_6; 1475 B->ops->mult = MatMult_SeqSBAIJ_6; 1476 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1477 break; 1478 case 7: 1479 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1480 B->ops->solve = MatSolve_SeqSBAIJ_7; 1481 B->ops->solvetranspose = MatSolve_SeqSBAIJ_7; 1482 B->ops->mult = MatMult_SeqSBAIJ_7; 1483 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1484 break; 1485 } 1486 } 1487 1488 b->mbs = mbs; 1489 b->nbs = mbs; 1490 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->imax);CHKERRQ(ierr); 1491 if (!nnz) { 1492 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1493 else if (nz <= 0) nz = 1; 1494 for (i=0; i<mbs; i++) { 1495 b->imax[i] = nz; 1496 } 1497 nz = nz*mbs; /* total nz */ 1498 } else { 1499 nz = 0; 1500 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1501 } 1502 /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1503 1504 /* allocate the matrix space */ 1505 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->m+1,PetscInt,&b->i);CHKERRQ(ierr); 1506 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1507 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1508 b->singlemalloc = PETSC_TRUE; 1509 1510 /* pointer to beginning of each row */ 1511 b->i[0] = 0; 1512 for (i=1; i<mbs+1; i++) { 1513 b->i[i] = b->i[i-1] + b->imax[i-1]; 1514 } 1515 1516 /* b->ilen will count nonzeros in each block row so far. */ 1517 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->ilen);CHKERRQ(ierr); 1518 ierr = PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr); 1519 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1520 1521 B->bs = bs; 1522 b->bs2 = bs2; 1523 b->nz = 0; 1524 b->maxnz = nz*bs2; 1525 1526 b->inew = 0; 1527 b->jnew = 0; 1528 b->anew = 0; 1529 b->a2anew = 0; 1530 b->permute = PETSC_FALSE; 1531 PetscFunctionReturn(0); 1532 } 1533 EXTERN_C_END 1534 1535 EXTERN_C_BEGIN 1536 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqAIJ(Mat,const MatType,MatReuse,Mat*); 1537 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqBAIJ(Mat,const MatType,MatReuse,Mat*); 1538 EXTERN_C_END 1539 1540 /*MC 1541 MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices, 1542 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1543 1544 Options Database Keys: 1545 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions() 1546 1547 Level: beginner 1548 1549 .seealso: MatCreateSeqSBAIJ 1550 M*/ 1551 1552 EXTERN_C_BEGIN 1553 #undef __FUNCT__ 1554 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1555 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqSBAIJ(Mat B) 1556 { 1557 Mat_SeqSBAIJ *b; 1558 PetscErrorCode ierr; 1559 PetscMPIInt size; 1560 PetscTruth flg; 1561 1562 PetscFunctionBegin; 1563 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1564 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1565 B->m = B->M = PetscMax(B->m,B->M); 1566 B->n = B->N = PetscMax(B->n,B->N); 1567 1568 ierr = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1569 B->data = (void*)b; 1570 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1571 B->ops->destroy = MatDestroy_SeqSBAIJ; 1572 B->ops->view = MatView_SeqSBAIJ; 1573 B->factor = 0; 1574 B->mapping = 0; 1575 b->row = 0; 1576 b->icol = 0; 1577 b->reallocs = 0; 1578 b->saved_values = 0; 1579 1580 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1581 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1582 1583 b->sorted = PETSC_FALSE; 1584 b->roworiented = PETSC_TRUE; 1585 b->nonew = 0; 1586 b->diag = 0; 1587 b->solve_work = 0; 1588 b->mult_work = 0; 1589 B->spptr = 0; 1590 b->keepzeroedrows = PETSC_FALSE; 1591 b->xtoy = 0; 1592 b->XtoY = 0; 1593 1594 b->inew = 0; 1595 b->jnew = 0; 1596 b->anew = 0; 1597 b->a2anew = 0; 1598 b->permute = PETSC_FALSE; 1599 1600 b->ignore_ltriangular = PETSC_FALSE; 1601 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_ignore_lower_triangular",&flg);CHKERRQ(ierr); 1602 if (flg) b->ignore_ltriangular = PETSC_TRUE; 1603 1604 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1605 "MatStoreValues_SeqSBAIJ", 1606 MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1607 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1608 "MatRetrieveValues_SeqSBAIJ", 1609 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1610 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1611 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1612 MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1613 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C", 1614 "MatConvert_SeqSBAIJ_SeqAIJ", 1615 MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr); 1616 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C", 1617 "MatConvert_SeqSBAIJ_SeqBAIJ", 1618 MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr); 1619 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C", 1620 "MatSeqSBAIJSetPreallocation_SeqSBAIJ", 1621 MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr); 1622 1623 B->symmetric = PETSC_TRUE; 1624 B->structurally_symmetric = PETSC_TRUE; 1625 B->symmetric_set = PETSC_TRUE; 1626 B->structurally_symmetric_set = PETSC_TRUE; 1627 PetscFunctionReturn(0); 1628 } 1629 EXTERN_C_END 1630 1631 #undef __FUNCT__ 1632 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1633 /*@C 1634 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1635 compressed row) format. For good matrix assembly performance the 1636 user should preallocate the matrix storage by setting the parameter nz 1637 (or the array nnz). By setting these parameters accurately, performance 1638 during matrix assembly can be increased by more than a factor of 50. 1639 1640 Collective on Mat 1641 1642 Input Parameters: 1643 + A - the symmetric matrix 1644 . bs - size of block 1645 . nz - number of block nonzeros per block row (same for all rows) 1646 - nnz - array containing the number of block nonzeros in the upper triangular plus 1647 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1648 1649 Options Database Keys: 1650 . -mat_no_unroll - uses code that does not unroll the loops in the 1651 block calculations (much slower) 1652 . -mat_block_size - size of the blocks to use 1653 1654 Level: intermediate 1655 1656 Notes: 1657 Specify the preallocated storage with either nz or nnz (not both). 1658 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1659 allocation. For additional details, see the users manual chapter on 1660 matrices. 1661 1662 If the nnz parameter is given then the nz parameter is ignored 1663 1664 1665 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1666 @*/ 1667 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 1668 { 1669 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 1670 1671 PetscFunctionBegin; 1672 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1673 if (f) { 1674 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 1675 } 1676 PetscFunctionReturn(0); 1677 } 1678 1679 #undef __FUNCT__ 1680 #define __FUNCT__ "MatCreateSeqSBAIJ" 1681 /*@C 1682 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1683 compressed row) format. For good matrix assembly performance the 1684 user should preallocate the matrix storage by setting the parameter nz 1685 (or the array nnz). By setting these parameters accurately, performance 1686 during matrix assembly can be increased by more than a factor of 50. 1687 1688 Collective on MPI_Comm 1689 1690 Input Parameters: 1691 + comm - MPI communicator, set to PETSC_COMM_SELF 1692 . bs - size of block 1693 . m - number of rows, or number of columns 1694 . nz - number of block nonzeros per block row (same for all rows) 1695 - nnz - array containing the number of block nonzeros in the upper triangular plus 1696 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1697 1698 Output Parameter: 1699 . A - the symmetric matrix 1700 1701 Options Database Keys: 1702 . -mat_no_unroll - uses code that does not unroll the loops in the 1703 block calculations (much slower) 1704 . -mat_block_size - size of the blocks to use 1705 1706 Level: intermediate 1707 1708 Notes: 1709 1710 Specify the preallocated storage with either nz or nnz (not both). 1711 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1712 allocation. For additional details, see the users manual chapter on 1713 matrices. 1714 1715 If the nnz parameter is given then the nz parameter is ignored 1716 1717 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1718 @*/ 1719 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 1720 { 1721 PetscErrorCode ierr; 1722 1723 PetscFunctionBegin; 1724 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1725 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1726 ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1727 PetscFunctionReturn(0); 1728 } 1729 1730 #undef __FUNCT__ 1731 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1732 PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1733 { 1734 Mat C; 1735 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1736 PetscErrorCode ierr; 1737 PetscInt i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 1738 1739 PetscFunctionBegin; 1740 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1741 1742 *B = 0; 1743 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1744 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 1745 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1746 c = (Mat_SeqSBAIJ*)C->data; 1747 1748 C->preallocated = PETSC_TRUE; 1749 C->factor = A->factor; 1750 c->row = 0; 1751 c->icol = 0; 1752 c->saved_values = 0; 1753 c->keepzeroedrows = a->keepzeroedrows; 1754 C->assembled = PETSC_TRUE; 1755 1756 C->M = A->M; 1757 C->N = A->N; 1758 C->bs = A->bs; 1759 c->bs2 = a->bs2; 1760 c->mbs = a->mbs; 1761 c->nbs = a->nbs; 1762 1763 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->imax);CHKERRQ(ierr); 1764 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->ilen);CHKERRQ(ierr); 1765 for (i=0; i<mbs; i++) { 1766 c->imax[i] = a->imax[i]; 1767 c->ilen[i] = a->ilen[i]; 1768 } 1769 1770 /* allocate the matrix space */ 1771 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 1772 c->singlemalloc = PETSC_TRUE; 1773 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1774 if (mbs > 0) { 1775 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1776 if (cpvalues == MAT_COPY_VALUES) { 1777 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1778 } else { 1779 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1780 } 1781 } 1782 1783 ierr = PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr); 1784 c->sorted = a->sorted; 1785 c->roworiented = a->roworiented; 1786 c->nonew = a->nonew; 1787 1788 if (a->diag) { 1789 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 1790 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1791 for (i=0; i<mbs; i++) { 1792 c->diag[i] = a->diag[i]; 1793 } 1794 } else c->diag = 0; 1795 c->nz = a->nz; 1796 c->maxnz = a->maxnz; 1797 c->solve_work = 0; 1798 c->mult_work = 0; 1799 *B = C; 1800 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1801 PetscFunctionReturn(0); 1802 } 1803 1804 #undef __FUNCT__ 1805 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1806 PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer,const MatType type,Mat *A) 1807 { 1808 Mat_SeqSBAIJ *a; 1809 Mat B; 1810 PetscErrorCode ierr; 1811 int fd; 1812 PetscMPIInt size; 1813 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 1814 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 1815 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 1816 PetscInt *masked,nmask,tmp,bs2,ishift; 1817 PetscScalar *aa; 1818 MPI_Comm comm = ((PetscObject)viewer)->comm; 1819 1820 PetscFunctionBegin; 1821 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1822 bs2 = bs*bs; 1823 1824 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1825 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1826 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1827 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1828 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1829 M = header[1]; N = header[2]; nz = header[3]; 1830 1831 if (header[3] < 0) { 1832 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1833 } 1834 1835 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1836 1837 /* 1838 This code adds extra rows to make sure the number of rows is 1839 divisible by the blocksize 1840 */ 1841 mbs = M/bs; 1842 extra_rows = bs - M + bs*(mbs); 1843 if (extra_rows == bs) extra_rows = 0; 1844 else mbs++; 1845 if (extra_rows) { 1846 ierr = PetscLogInfo((0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr); 1847 } 1848 1849 /* read in row lengths */ 1850 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1851 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1852 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1853 1854 /* read in column indices */ 1855 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1856 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1857 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1858 1859 /* loop over row lengths determining block row lengths */ 1860 ierr = PetscMalloc(mbs*sizeof(PetscInt),&s_browlengths);CHKERRQ(ierr); 1861 ierr = PetscMemzero(s_browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1862 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 1863 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1864 masked = mask + mbs; 1865 rowcount = 0; nzcount = 0; 1866 for (i=0; i<mbs; i++) { 1867 nmask = 0; 1868 for (j=0; j<bs; j++) { 1869 kmax = rowlengths[rowcount]; 1870 for (k=0; k<kmax; k++) { 1871 tmp = jj[nzcount++]/bs; /* block col. index */ 1872 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1873 } 1874 rowcount++; 1875 } 1876 s_browlengths[i] += nmask; 1877 1878 /* zero out the mask elements we set */ 1879 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1880 } 1881 1882 /* create our matrix */ 1883 ierr = MatCreate(comm,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows,&B);CHKERRQ(ierr); 1884 ierr = MatSetType(B,type);CHKERRQ(ierr); 1885 ierr = MatSeqSBAIJSetPreallocation(B,bs,0,s_browlengths);CHKERRQ(ierr); 1886 a = (Mat_SeqSBAIJ*)B->data; 1887 1888 /* set matrix "i" values */ 1889 a->i[0] = 0; 1890 for (i=1; i<= mbs; i++) { 1891 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1892 a->ilen[i-1] = s_browlengths[i-1]; 1893 } 1894 a->nz = a->i[mbs]; 1895 1896 /* read in nonzero values */ 1897 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1898 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1899 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1900 1901 /* set "a" and "j" values into matrix */ 1902 nzcount = 0; jcount = 0; 1903 for (i=0; i<mbs; i++) { 1904 nzcountb = nzcount; 1905 nmask = 0; 1906 for (j=0; j<bs; j++) { 1907 kmax = rowlengths[i*bs+j]; 1908 for (k=0; k<kmax; k++) { 1909 tmp = jj[nzcount++]/bs; /* block col. index */ 1910 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1911 } 1912 } 1913 /* sort the masked values */ 1914 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1915 1916 /* set "j" values into matrix */ 1917 maskcount = 1; 1918 for (j=0; j<nmask; j++) { 1919 a->j[jcount++] = masked[j]; 1920 mask[masked[j]] = maskcount++; 1921 } 1922 1923 /* set "a" values into matrix */ 1924 ishift = bs2*a->i[i]; 1925 for (j=0; j<bs; j++) { 1926 kmax = rowlengths[i*bs+j]; 1927 for (k=0; k<kmax; k++) { 1928 tmp = jj[nzcountb]/bs ; /* block col. index */ 1929 if (tmp >= i){ 1930 block = mask[tmp] - 1; 1931 point = jj[nzcountb] - bs*tmp; 1932 idx = ishift + bs2*block + j + bs*point; 1933 a->a[idx] = aa[nzcountb]; 1934 } 1935 nzcountb++; 1936 } 1937 } 1938 /* zero out the mask elements we set */ 1939 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1940 } 1941 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1942 1943 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1944 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1945 ierr = PetscFree(aa);CHKERRQ(ierr); 1946 ierr = PetscFree(jj);CHKERRQ(ierr); 1947 ierr = PetscFree(mask);CHKERRQ(ierr); 1948 1949 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1950 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1951 ierr = MatView_Private(B);CHKERRQ(ierr); 1952 *A = B; 1953 PetscFunctionReturn(0); 1954 } 1955 1956 #undef __FUNCT__ 1957 #define __FUNCT__ "MatRelax_SeqSBAIJ" 1958 PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1959 { 1960 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1961 MatScalar *aa=a->a,*v,*v1; 1962 PetscScalar *x,*b,*t,sum,d; 1963 PetscErrorCode ierr; 1964 PetscInt m=a->mbs,bs=A->bs,*ai=a->i,*aj=a->j; 1965 PetscInt nz,nz1,*vj,*vj1,i; 1966 1967 PetscFunctionBegin; 1968 its = its*lits; 1969 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1970 1971 if (bs > 1) 1972 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 1973 1974 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1975 if (xx != bb) { 1976 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1977 } else { 1978 b = x; 1979 } 1980 1981 ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr); 1982 1983 if (flag & SOR_ZERO_INITIAL_GUESS) { 1984 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1985 for (i=0; i<m; i++) 1986 t[i] = b[i]; 1987 1988 for (i=0; i<m; i++){ 1989 d = *(aa + ai[i]); /* diag[i] */ 1990 v = aa + ai[i] + 1; 1991 vj = aj + ai[i] + 1; 1992 nz = ai[i+1] - ai[i] - 1; 1993 x[i] = omega*t[i]/d; 1994 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 1995 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 1996 } 1997 } 1998 1999 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2000 for (i=0; i<m; i++) 2001 t[i] = b[i]; 2002 2003 for (i=0; i<m-1; i++){ /* update rhs */ 2004 v = aa + ai[i] + 1; 2005 vj = aj + ai[i] + 1; 2006 nz = ai[i+1] - ai[i] - 1; 2007 while (nz--) t[*vj++] -= x[i]*(*v++); 2008 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2009 } 2010 for (i=m-1; i>=0; i--){ 2011 d = *(aa + ai[i]); 2012 v = aa + ai[i] + 1; 2013 vj = aj + ai[i] + 1; 2014 nz = ai[i+1] - ai[i] - 1; 2015 sum = t[i]; 2016 while (nz--) sum -= x[*vj++]*(*v++); 2017 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2018 x[i] = (1-omega)*x[i] + omega*sum/d; 2019 } 2020 } 2021 its--; 2022 } 2023 2024 while (its--) { 2025 /* 2026 forward sweep: 2027 for i=0,...,m-1: 2028 sum[i] = (b[i] - U(i,:)x )/d[i]; 2029 x[i] = (1-omega)x[i] + omega*sum[i]; 2030 b = b - x[i]*U^T(i,:); 2031 2032 */ 2033 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2034 for (i=0; i<m; i++) 2035 t[i] = b[i]; 2036 2037 for (i=0; i<m; i++){ 2038 d = *(aa + ai[i]); /* diag[i] */ 2039 v = aa + ai[i] + 1; v1=v; 2040 vj = aj + ai[i] + 1; vj1=vj; 2041 nz = ai[i+1] - ai[i] - 1; nz1=nz; 2042 sum = t[i]; 2043 while (nz1--) sum -= (*v1++)*x[*vj1++]; 2044 x[i] = (1-omega)*x[i] + omega*sum/d; 2045 while (nz--) t[*vj++] -= x[i]*(*v++); 2046 ierr = PetscLogFlops(4*nz-2);CHKERRQ(ierr); 2047 } 2048 } 2049 2050 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2051 /* 2052 backward sweep: 2053 b = b - x[i]*U^T(i,:), i=0,...,n-2 2054 for i=m-1,...,0: 2055 sum[i] = (b[i] - U(i,:)x )/d[i]; 2056 x[i] = (1-omega)x[i] + omega*sum[i]; 2057 */ 2058 for (i=0; i<m; i++) 2059 t[i] = b[i]; 2060 2061 for (i=0; i<m-1; i++){ /* update rhs */ 2062 v = aa + ai[i] + 1; 2063 vj = aj + ai[i] + 1; 2064 nz = ai[i+1] - ai[i] - 1; 2065 while (nz--) t[*vj++] -= x[i]*(*v++); 2066 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2067 } 2068 for (i=m-1; i>=0; i--){ 2069 d = *(aa + ai[i]); 2070 v = aa + ai[i] + 1; 2071 vj = aj + ai[i] + 1; 2072 nz = ai[i+1] - ai[i] - 1; 2073 sum = t[i]; 2074 while (nz--) sum -= x[*vj++]*(*v++); 2075 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2076 x[i] = (1-omega)*x[i] + omega*sum/d; 2077 } 2078 } 2079 } 2080 2081 ierr = PetscFree(t);CHKERRQ(ierr); 2082 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2083 if (bb != xx) { 2084 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2085 } 2086 PetscFunctionReturn(0); 2087 } 2088 2089 2090 2091 2092 2093 2094