1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the SBAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 8 #include "src/inline/spops.h" 9 #include "src/mat/impls/sbaij/seq/sbaij.h" 10 11 #define CHUNKSIZE 10 12 13 /* 14 Checks for missing diagonals 15 */ 16 #undef __FUNCT__ 17 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 18 PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A) 19 { 20 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 21 PetscErrorCode ierr; 22 PetscInt *diag,*jj = a->j,i; 23 24 PetscFunctionBegin; 25 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 26 diag = a->diag; 27 for (i=0; i<a->mbs; i++) { 28 if (jj[diag[i]] != i) SETERRQ1(PETSC_ERR_ARG_CORRUPT,"Matrix is missing diagonal number %D",i); 29 } 30 PetscFunctionReturn(0); 31 } 32 33 #undef __FUNCT__ 34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 35 PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A) 36 { 37 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 38 PetscErrorCode ierr; 39 PetscInt i,mbs = a->mbs; 40 41 PetscFunctionBegin; 42 if (a->diag) PetscFunctionReturn(0); 43 44 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 45 ierr = PetscLogObjectMemory(A,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 46 for (i=0; i<mbs; i++) a->diag[i] = a->i[i]; 47 PetscFunctionReturn(0); 48 } 49 50 #undef __FUNCT__ 51 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 52 static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 53 { 54 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 55 PetscInt n = a->mbs,i; 56 57 PetscFunctionBegin; 58 *nn = n; 59 if (!ia) PetscFunctionReturn(0); 60 61 if (oshift == 1) { 62 /* temporarily add 1 to i and j indices */ 63 PetscInt nz = a->i[n]; 64 for (i=0; i<nz; i++) a->j[i]++; 65 for (i=0; i<n+1; i++) a->i[i]++; 66 *ia = a->i; *ja = a->j; 67 } else { 68 *ia = a->i; *ja = a->j; 69 } 70 PetscFunctionReturn(0); 71 } 72 73 #undef __FUNCT__ 74 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 75 static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 76 { 77 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 78 PetscInt i,n = a->mbs; 79 80 PetscFunctionBegin; 81 if (!ia) PetscFunctionReturn(0); 82 83 if (oshift == 1) { 84 PetscInt nz = a->i[n]-1; 85 for (i=0; i<nz; i++) a->j[i]--; 86 for (i=0; i<n+1; i++) a->i[i]--; 87 } 88 PetscFunctionReturn(0); 89 } 90 91 #undef __FUNCT__ 92 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 93 PetscErrorCode MatDestroy_SeqSBAIJ(Mat A) 94 { 95 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 96 PetscErrorCode ierr; 97 98 PetscFunctionBegin; 99 PetscLogObjectState((PetscObject)A,"Rows=%D, NZ=%D",A->m,a->nz); 100 ierr = PetscFree(a->a);CHKERRQ(ierr); 101 if (!a->singlemalloc) { 102 ierr = PetscFree(a->i);CHKERRQ(ierr); 103 ierr = PetscFree(a->j);CHKERRQ(ierr); 104 } 105 if (a->row) { 106 ierr = ISDestroy(a->row);CHKERRQ(ierr); 107 } 108 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 109 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 110 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 111 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 112 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 113 if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);} 114 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 115 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 116 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 117 118 if (a->inew){ 119 ierr = PetscFree(a->inew);CHKERRQ(ierr); 120 a->inew = 0; 121 } 122 ierr = PetscFree(a);CHKERRQ(ierr); 123 124 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 125 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 126 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 127 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 128 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqbaij_C","",PETSC_NULL);CHKERRQ(ierr); 129 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 130 PetscFunctionReturn(0); 131 } 132 133 #undef __FUNCT__ 134 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 135 PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op) 136 { 137 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 138 PetscErrorCode ierr; 139 140 PetscFunctionBegin; 141 switch (op) { 142 case MAT_ROW_ORIENTED: 143 a->roworiented = PETSC_TRUE; 144 break; 145 case MAT_COLUMN_ORIENTED: 146 a->roworiented = PETSC_FALSE; 147 break; 148 case MAT_COLUMNS_SORTED: 149 a->sorted = PETSC_TRUE; 150 break; 151 case MAT_COLUMNS_UNSORTED: 152 a->sorted = PETSC_FALSE; 153 break; 154 case MAT_KEEP_ZEROED_ROWS: 155 a->keepzeroedrows = PETSC_TRUE; 156 break; 157 case MAT_NO_NEW_NONZERO_LOCATIONS: 158 a->nonew = 1; 159 break; 160 case MAT_NEW_NONZERO_LOCATION_ERR: 161 a->nonew = -1; 162 break; 163 case MAT_NEW_NONZERO_ALLOCATION_ERR: 164 a->nonew = -2; 165 break; 166 case MAT_YES_NEW_NONZERO_LOCATIONS: 167 a->nonew = 0; 168 break; 169 case MAT_ROWS_SORTED: 170 case MAT_ROWS_UNSORTED: 171 case MAT_YES_NEW_DIAGONALS: 172 case MAT_IGNORE_OFF_PROC_ENTRIES: 173 case MAT_USE_HASH_TABLE: 174 ierr = PetscLogInfo((A,"MatSetOption_SeqSBAIJ:Option ignored\n"));CHKERRQ(ierr); 175 break; 176 case MAT_NO_NEW_DIAGONALS: 177 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 178 case MAT_NOT_SYMMETRIC: 179 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 180 case MAT_HERMITIAN: 181 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 182 case MAT_SYMMETRIC: 183 case MAT_STRUCTURALLY_SYMMETRIC: 184 case MAT_NOT_HERMITIAN: 185 case MAT_SYMMETRY_ETERNAL: 186 case MAT_NOT_SYMMETRY_ETERNAL: 187 case MAT_IGNORE_LOWER_TRIANGULAR: 188 a->ignore_ltriangular = PETSC_TRUE; 189 break; 190 case MAT_ERROR_LOWER_TRIANGULAR: 191 a->ignore_ltriangular = PETSC_FALSE; 192 break; 193 default: 194 SETERRQ(PETSC_ERR_SUP,"unknown option"); 195 } 196 PetscFunctionReturn(0); 197 } 198 199 #undef __FUNCT__ 200 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 201 PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **v) 202 { 203 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 204 PetscErrorCode ierr; 205 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2; 206 MatScalar *aa,*aa_i; 207 PetscScalar *v_i; 208 209 PetscFunctionBegin; 210 bs = A->bs; 211 ai = a->i; 212 aj = a->j; 213 aa = a->a; 214 bs2 = a->bs2; 215 216 if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %D out of range", row); 217 218 bn = row/bs; /* Block number */ 219 bp = row % bs; /* Block position */ 220 M = ai[bn+1] - ai[bn]; 221 *ncols = bs*M; 222 223 if (v) { 224 *v = 0; 225 if (*ncols) { 226 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 227 for (i=0; i<M; i++) { /* for each block in the block row */ 228 v_i = *v + i*bs; 229 aa_i = aa + bs2*(ai[bn] + i); 230 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 231 } 232 } 233 } 234 235 if (cols) { 236 *cols = 0; 237 if (*ncols) { 238 ierr = PetscMalloc((*ncols+row)*sizeof(PetscInt),cols);CHKERRQ(ierr); 239 for (i=0; i<M; i++) { /* for each block in the block row */ 240 cols_i = *cols + i*bs; 241 itmp = bs*aj[ai[bn] + i]; 242 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 243 } 244 } 245 } 246 247 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 248 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 249 #ifdef column_search 250 v_i = *v + M*bs; 251 cols_i = *cols + M*bs; 252 for (i=0; i<bn; i++){ /* for each block row */ 253 M = ai[i+1] - ai[i]; 254 for (j=0; j<M; j++){ 255 itmp = aj[ai[i] + j]; /* block column value */ 256 if (itmp == bn){ 257 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 258 for (k=0; k<bs; k++) { 259 *cols_i++ = i*bs+k; 260 *v_i++ = aa_i[k]; 261 } 262 *ncols += bs; 263 break; 264 } 265 } 266 } 267 #endif 268 PetscFunctionReturn(0); 269 } 270 271 #undef __FUNCT__ 272 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 273 PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 274 { 275 PetscErrorCode ierr; 276 277 PetscFunctionBegin; 278 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 279 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 280 PetscFunctionReturn(0); 281 } 282 283 #undef __FUNCT__ 284 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 285 PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,Mat *B) 286 { 287 PetscErrorCode ierr; 288 PetscFunctionBegin; 289 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 290 PetscFunctionReturn(0); 291 } 292 293 #undef __FUNCT__ 294 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 295 static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 296 { 297 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 298 PetscErrorCode ierr; 299 PetscInt i,j,bs = A->bs,k,l,bs2=a->bs2; 300 char *name; 301 PetscViewerFormat format; 302 303 PetscFunctionBegin; 304 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 305 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 306 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 307 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 308 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 309 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 310 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 311 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 312 for (i=0; i<a->mbs; i++) { 313 for (j=0; j<bs; j++) { 314 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 315 for (k=a->i[i]; k<a->i[i+1]; k++) { 316 for (l=0; l<bs; l++) { 317 #if defined(PETSC_USE_COMPLEX) 318 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 319 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 320 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 321 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 322 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 323 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 324 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 325 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 326 } 327 #else 328 if (a->a[bs2*k + l*bs + j] != 0.0) { 329 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 330 } 331 #endif 332 } 333 } 334 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 335 } 336 } 337 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 338 } else { 339 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 340 for (i=0; i<a->mbs; i++) { 341 for (j=0; j<bs; j++) { 342 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 343 for (k=a->i[i]; k<a->i[i+1]; k++) { 344 for (l=0; l<bs; l++) { 345 #if defined(PETSC_USE_COMPLEX) 346 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 347 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 348 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 349 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 350 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 351 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 352 } else { 353 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 354 } 355 #else 356 ierr = PetscViewerASCIIPrintf(viewer," %D %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 357 #endif 358 } 359 } 360 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 361 } 362 } 363 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 364 } 365 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 366 PetscFunctionReturn(0); 367 } 368 369 #undef __FUNCT__ 370 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 371 static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 372 { 373 Mat A = (Mat) Aa; 374 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 375 PetscErrorCode ierr; 376 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->bs,bs2=a->bs2; 377 PetscMPIInt rank; 378 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 379 MatScalar *aa; 380 MPI_Comm comm; 381 PetscViewer viewer; 382 383 PetscFunctionBegin; 384 /* 385 This is nasty. If this is called from an originally parallel matrix 386 then all processes call this,but only the first has the matrix so the 387 rest should return immediately. 388 */ 389 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 390 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 391 if (rank) PetscFunctionReturn(0); 392 393 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 394 395 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 396 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 397 398 /* loop over matrix elements drawing boxes */ 399 color = PETSC_DRAW_BLUE; 400 for (i=0,row=0; i<mbs; i++,row+=bs) { 401 for (j=a->i[i]; j<a->i[i+1]; j++) { 402 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 403 x_l = a->j[j]*bs; x_r = x_l + 1.0; 404 aa = a->a + j*bs2; 405 for (k=0; k<bs; k++) { 406 for (l=0; l<bs; l++) { 407 if (PetscRealPart(*aa++) >= 0.) continue; 408 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 409 } 410 } 411 } 412 } 413 color = PETSC_DRAW_CYAN; 414 for (i=0,row=0; i<mbs; i++,row+=bs) { 415 for (j=a->i[i]; j<a->i[i+1]; j++) { 416 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 417 x_l = a->j[j]*bs; x_r = x_l + 1.0; 418 aa = a->a + j*bs2; 419 for (k=0; k<bs; k++) { 420 for (l=0; l<bs; l++) { 421 if (PetscRealPart(*aa++) != 0.) continue; 422 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 423 } 424 } 425 } 426 } 427 428 color = PETSC_DRAW_RED; 429 for (i=0,row=0; i<mbs; i++,row+=bs) { 430 for (j=a->i[i]; j<a->i[i+1]; j++) { 431 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 432 x_l = a->j[j]*bs; x_r = x_l + 1.0; 433 aa = a->a + j*bs2; 434 for (k=0; k<bs; k++) { 435 for (l=0; l<bs; l++) { 436 if (PetscRealPart(*aa++) <= 0.) continue; 437 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 438 } 439 } 440 } 441 } 442 PetscFunctionReturn(0); 443 } 444 445 #undef __FUNCT__ 446 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 447 static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 448 { 449 PetscErrorCode ierr; 450 PetscReal xl,yl,xr,yr,w,h; 451 PetscDraw draw; 452 PetscTruth isnull; 453 454 PetscFunctionBegin; 455 456 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 457 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 458 459 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 460 xr = A->m; yr = A->m; h = yr/10.0; w = xr/10.0; 461 xr += w; yr += h; xl = -w; yl = -h; 462 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 463 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 464 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 465 PetscFunctionReturn(0); 466 } 467 468 #undef __FUNCT__ 469 #define __FUNCT__ "MatView_SeqSBAIJ" 470 PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 471 { 472 PetscErrorCode ierr; 473 PetscTruth iascii,isdraw; 474 475 PetscFunctionBegin; 476 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 477 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 478 if (iascii){ 479 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 480 } else if (isdraw) { 481 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 482 } else { 483 Mat B; 484 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 485 ierr = MatView(B,viewer);CHKERRQ(ierr); 486 ierr = MatDestroy(B);CHKERRQ(ierr); 487 } 488 PetscFunctionReturn(0); 489 } 490 491 492 #undef __FUNCT__ 493 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 494 PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 495 { 496 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 497 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 498 PetscInt *ai = a->i,*ailen = a->ilen; 499 PetscInt brow,bcol,ridx,cidx,bs=A->bs,bs2=a->bs2; 500 MatScalar *ap,*aa = a->a,zero = 0.0; 501 502 PetscFunctionBegin; 503 for (k=0; k<m; k++) { /* loop over rows */ 504 row = im[k]; brow = row/bs; 505 if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",row); 506 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1); 507 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 508 nrow = ailen[brow]; 509 for (l=0; l<n; l++) { /* loop over columns */ 510 if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",in[l]); 511 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->n-1); 512 col = in[l] ; 513 bcol = col/bs; 514 cidx = col%bs; 515 ridx = row%bs; 516 high = nrow; 517 low = 0; /* assume unsorted */ 518 while (high-low > 5) { 519 t = (low+high)/2; 520 if (rp[t] > bcol) high = t; 521 else low = t; 522 } 523 for (i=low; i<high; i++) { 524 if (rp[i] > bcol) break; 525 if (rp[i] == bcol) { 526 *v++ = ap[bs2*i+bs*cidx+ridx]; 527 goto finished; 528 } 529 } 530 *v++ = zero; 531 finished:; 532 } 533 } 534 PetscFunctionReturn(0); 535 } 536 537 538 #undef __FUNCT__ 539 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 540 PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 541 { 542 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 543 PetscErrorCode ierr; 544 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 545 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 546 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->bs,stepval; 547 PetscTruth roworiented=a->roworiented; 548 const MatScalar *value = v; 549 MatScalar *ap,*aa = a->a,*bap; 550 551 PetscFunctionBegin; 552 if (roworiented) { 553 stepval = (n-1)*bs; 554 } else { 555 stepval = (m-1)*bs; 556 } 557 for (k=0; k<m; k++) { /* loop over added rows */ 558 row = im[k]; 559 if (row < 0) continue; 560 #if defined(PETSC_USE_DEBUG) 561 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 562 #endif 563 rp = aj + ai[row]; 564 ap = aa + bs2*ai[row]; 565 rmax = imax[row]; 566 nrow = ailen[row]; 567 low = 0; 568 high = nrow; 569 for (l=0; l<n; l++) { /* loop over added columns */ 570 if (in[l] < 0) continue; 571 col = in[l]; 572 #if defined(PETSC_USE_DEBUG) 573 if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",col,a->nbs-1); 574 #endif 575 if (col < row) continue; /* ignore lower triangular block */ 576 if (roworiented) { 577 value = v + k*(stepval+bs)*bs + l*bs; 578 } else { 579 value = v + l*(stepval+bs)*bs + k*bs; 580 } 581 if (col < lastcol) low = 0; else high = nrow; 582 lastcol = col; 583 while (high-low > 7) { 584 t = (low+high)/2; 585 if (rp[t] > col) high = t; 586 else low = t; 587 } 588 for (i=low; i<high; i++) { 589 if (rp[i] > col) break; 590 if (rp[i] == col) { 591 bap = ap + bs2*i; 592 if (roworiented) { 593 if (is == ADD_VALUES) { 594 for (ii=0; ii<bs; ii++,value+=stepval) { 595 for (jj=ii; jj<bs2; jj+=bs) { 596 bap[jj] += *value++; 597 } 598 } 599 } else { 600 for (ii=0; ii<bs; ii++,value+=stepval) { 601 for (jj=ii; jj<bs2; jj+=bs) { 602 bap[jj] = *value++; 603 } 604 } 605 } 606 } else { 607 if (is == ADD_VALUES) { 608 for (ii=0; ii<bs; ii++,value+=stepval) { 609 for (jj=0; jj<bs; jj++) { 610 *bap++ += *value++; 611 } 612 } 613 } else { 614 for (ii=0; ii<bs; ii++,value+=stepval) { 615 for (jj=0; jj<bs; jj++) { 616 *bap++ = *value++; 617 } 618 } 619 } 620 } 621 goto noinsert2; 622 } 623 } 624 if (nonew == 1) goto noinsert2; 625 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 626 if (nrow >= rmax) { 627 /* there is no extra room in row, therefore enlarge */ 628 PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 629 MatScalar *new_a; 630 631 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 632 633 /* malloc new storage space */ 634 len = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); 635 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 636 new_j = (PetscInt*)(new_a + bs2*new_nz); 637 new_i = new_j + new_nz; 638 639 /* copy over old data into new slots */ 640 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 641 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 642 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); 643 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 644 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); 645 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 646 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 647 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 648 /* free up old matrix storage */ 649 ierr = PetscFree(a->a);CHKERRQ(ierr); 650 if (!a->singlemalloc) { 651 ierr = PetscFree(a->i);CHKERRQ(ierr); 652 ierr = PetscFree(a->j);CHKERRQ(ierr); 653 } 654 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 655 a->singlemalloc = PETSC_TRUE; 656 657 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 658 rmax = imax[row] = imax[row] + CHUNKSIZE; 659 ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); 660 a->maxnz += bs2*CHUNKSIZE; 661 a->reallocs++; 662 a->nz++; 663 } 664 N = nrow++ - 1; 665 /* shift up all the later entries in this row */ 666 for (ii=N; ii>=i; ii--) { 667 rp[ii+1] = rp[ii]; 668 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 669 } 670 if (N >= i) { 671 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 672 } 673 rp[i] = col; 674 bap = ap + bs2*i; 675 if (roworiented) { 676 for (ii=0; ii<bs; ii++,value+=stepval) { 677 for (jj=ii; jj<bs2; jj+=bs) { 678 bap[jj] = *value++; 679 } 680 } 681 } else { 682 for (ii=0; ii<bs; ii++,value+=stepval) { 683 for (jj=0; jj<bs; jj++) { 684 *bap++ = *value++; 685 } 686 } 687 } 688 noinsert2:; 689 low = i; 690 } 691 ailen[row] = nrow; 692 } 693 PetscFunctionReturn(0); 694 } 695 696 #undef __FUNCT__ 697 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 698 PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 699 { 700 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 701 PetscErrorCode ierr; 702 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 703 PetscInt m = A->m,*ip,N,*ailen = a->ilen; 704 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 705 MatScalar *aa = a->a,*ap; 706 707 PetscFunctionBegin; 708 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 709 710 if (m) rmax = ailen[0]; 711 for (i=1; i<mbs; i++) { 712 /* move each row back by the amount of empty slots (fshift) before it*/ 713 fshift += imax[i-1] - ailen[i-1]; 714 rmax = PetscMax(rmax,ailen[i]); 715 if (fshift) { 716 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 717 N = ailen[i]; 718 for (j=0; j<N; j++) { 719 ip[j-fshift] = ip[j]; 720 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 721 } 722 } 723 ai[i] = ai[i-1] + ailen[i-1]; 724 } 725 if (mbs) { 726 fshift += imax[mbs-1] - ailen[mbs-1]; 727 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 728 } 729 /* reset ilen and imax for each row */ 730 for (i=0; i<mbs; i++) { 731 ailen[i] = imax[i] = ai[i+1] - ai[i]; 732 } 733 a->nz = ai[mbs]; 734 735 /* diagonals may have moved, reset it */ 736 if (a->diag) { 737 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 738 } 739 ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n", 740 m,A->m,A->bs,fshift*bs2,a->nz*bs2));CHKERRQ(ierr); 741 ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %D\n",a->reallocs));CHKERRQ(ierr); 742 ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %D\n",rmax));CHKERRQ(ierr); 743 a->reallocs = 0; 744 A->info.nz_unneeded = (PetscReal)fshift*bs2; 745 PetscFunctionReturn(0); 746 } 747 748 /* 749 This function returns an array of flags which indicate the locations of contiguous 750 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 751 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 752 Assume: sizes should be long enough to hold all the values. 753 */ 754 #undef __FUNCT__ 755 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 756 PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 757 { 758 PetscInt i,j,k,row; 759 PetscTruth flg; 760 761 PetscFunctionBegin; 762 for (i=0,j=0; i<n; j++) { 763 row = idx[i]; 764 if (row%bs!=0) { /* Not the begining of a block */ 765 sizes[j] = 1; 766 i++; 767 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 768 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 769 i++; 770 } else { /* Begining of the block, so check if the complete block exists */ 771 flg = PETSC_TRUE; 772 for (k=1; k<bs; k++) { 773 if (row+k != idx[i+k]) { /* break in the block */ 774 flg = PETSC_FALSE; 775 break; 776 } 777 } 778 if (flg) { /* No break in the bs */ 779 sizes[j] = bs; 780 i+= bs; 781 } else { 782 sizes[j] = 1; 783 i++; 784 } 785 } 786 } 787 *bs_max = j; 788 PetscFunctionReturn(0); 789 } 790 791 792 /* Only add/insert a(i,j) with i<=j (blocks). 793 Any a(i,j) with i>j input by user is ingored. 794 */ 795 796 #undef __FUNCT__ 797 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 798 PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 799 { 800 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 801 PetscErrorCode ierr; 802 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 803 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 804 PetscInt *aj=a->j,nonew=a->nonew,bs=A->bs,brow,bcol; 805 PetscInt ridx,cidx,bs2=a->bs2; 806 MatScalar *ap,value,*aa=a->a,*bap; 807 808 PetscFunctionBegin; 809 for (k=0; k<m; k++) { /* loop over added rows */ 810 row = im[k]; /* row number */ 811 brow = row/bs; /* block row number */ 812 if (row < 0) continue; 813 #if defined(PETSC_USE_DEBUG) 814 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1); 815 #endif 816 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 817 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 818 rmax = imax[brow]; /* maximum space allocated for this row */ 819 nrow = ailen[brow]; /* actual length of this row */ 820 low = 0; 821 822 for (l=0; l<n; l++) { /* loop over added columns */ 823 if (in[l] < 0) continue; 824 #if defined(PETSC_USE_DEBUG) 825 if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->m-1); 826 #endif 827 col = in[l]; 828 bcol = col/bs; /* block col number */ 829 830 if (brow > bcol) { 831 if (a->ignore_ltriangular){ 832 continue; /* ignore lower triangular values */ 833 } else { 834 SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR)"); 835 } 836 } 837 838 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 839 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 840 /* element value a(k,l) */ 841 if (roworiented) { 842 value = v[l + k*n]; 843 } else { 844 value = v[k + l*m]; 845 } 846 847 /* move pointer bap to a(k,l) quickly and add/insert value */ 848 if (col < lastcol) low = 0; high = nrow; 849 lastcol = col; 850 while (high-low > 7) { 851 t = (low+high)/2; 852 if (rp[t] > bcol) high = t; 853 else low = t; 854 } 855 for (i=low; i<high; i++) { 856 if (rp[i] > bcol) break; 857 if (rp[i] == bcol) { 858 bap = ap + bs2*i + bs*cidx + ridx; 859 if (is == ADD_VALUES) *bap += value; 860 else *bap = value; 861 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 862 if (brow == bcol && ridx < cidx){ 863 bap = ap + bs2*i + bs*ridx + cidx; 864 if (is == ADD_VALUES) *bap += value; 865 else *bap = value; 866 } 867 goto noinsert1; 868 } 869 } 870 871 if (nonew == 1) goto noinsert1; 872 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 873 if (nrow >= rmax) { 874 /* there is no extra room in row, therefore enlarge */ 875 PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 876 MatScalar *new_a; 877 878 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 879 880 /* Malloc new storage space */ 881 len = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); 882 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 883 new_j = (PetscInt*)(new_a + bs2*new_nz); 884 new_i = new_j + new_nz; 885 886 /* copy over old data into new slots */ 887 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 888 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 889 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); 890 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 891 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); 892 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 893 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 894 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 895 /* free up old matrix storage */ 896 ierr = PetscFree(a->a);CHKERRQ(ierr); 897 if (!a->singlemalloc) { 898 ierr = PetscFree(a->i);CHKERRQ(ierr); 899 ierr = PetscFree(a->j);CHKERRQ(ierr); 900 } 901 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 902 a->singlemalloc = PETSC_TRUE; 903 904 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 905 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 906 ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); 907 a->maxnz += bs2*CHUNKSIZE; 908 a->reallocs++; 909 a->nz++; 910 } 911 912 N = nrow++ - 1; 913 /* shift up all the later entries in this row */ 914 for (ii=N; ii>=i; ii--) { 915 rp[ii+1] = rp[ii]; 916 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 917 } 918 if (N>=i) { 919 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 920 } 921 rp[i] = bcol; 922 ap[bs2*i + bs*cidx + ridx] = value; 923 noinsert1:; 924 low = i; 925 } 926 } /* end of loop over added columns */ 927 ailen[brow] = nrow; 928 } /* end of loop over added rows */ 929 PetscFunctionReturn(0); 930 } 931 932 EXTERN PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*); 933 EXTERN PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*); 934 EXTERN PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat,PetscInt,IS[],PetscInt); 935 EXTERN PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,PetscInt,MatReuse,Mat*); 936 EXTERN PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat*[]); 937 EXTERN PetscErrorCode MatScale_SeqSBAIJ(const PetscScalar*,Mat); 938 EXTERN PetscErrorCode MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *); 939 EXTERN PetscErrorCode MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*); 940 EXTERN PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat,Vec); 941 EXTERN PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec); 942 EXTERN PetscErrorCode MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *); 943 EXTERN PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat); 944 EXTERN PetscErrorCode MatGetRowMax_SeqSBAIJ(Mat,Vec); 945 946 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N(Mat,Vec,Vec); 947 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1(Mat,Vec,Vec); 948 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2(Mat,Vec,Vec); 949 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3(Mat,Vec,Vec); 950 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4(Mat,Vec,Vec); 951 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5(Mat,Vec,Vec); 952 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6(Mat,Vec,Vec); 953 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7(Mat,Vec,Vec); 954 955 EXTERN PetscErrorCode MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs); 956 957 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec); 958 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec); 959 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec); 960 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 961 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec); 962 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec); 963 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec); 964 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec); 965 966 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,MatFactorInfo*,Mat*); 967 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,MatFactorInfo*,Mat*); 968 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,MatFactorInfo*,Mat*); 969 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,MatFactorInfo*,Mat*); 970 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,MatFactorInfo*,Mat*); 971 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,MatFactorInfo*,Mat*); 972 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,MatFactorInfo*,Mat*); 973 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,MatFactorInfo*,Mat*); 974 EXTERN PetscErrorCode MatGetInertia_SeqSBAIJ(Mat,PetscInt*,PetscInt*,PetscInt*); 975 976 EXTERN PetscErrorCode MatMult_SeqSBAIJ_1(Mat,Vec,Vec); 977 EXTERN PetscErrorCode MatMult_SeqSBAIJ_2(Mat,Vec,Vec); 978 EXTERN PetscErrorCode MatMult_SeqSBAIJ_3(Mat,Vec,Vec); 979 EXTERN PetscErrorCode MatMult_SeqSBAIJ_4(Mat,Vec,Vec); 980 EXTERN PetscErrorCode MatMult_SeqSBAIJ_5(Mat,Vec,Vec); 981 EXTERN PetscErrorCode MatMult_SeqSBAIJ_6(Mat,Vec,Vec); 982 EXTERN PetscErrorCode MatMult_SeqSBAIJ_7(Mat,Vec,Vec); 983 EXTERN PetscErrorCode MatMult_SeqSBAIJ_N(Mat,Vec,Vec); 984 985 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec); 986 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec); 987 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec); 988 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec); 989 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec); 990 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec); 991 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec); 992 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec); 993 994 #ifdef HAVE_MatICCFactor 995 /* modified from MatILUFactor_SeqSBAIJ, needs further work! */ 996 #undef __FUNCT__ 997 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 998 PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info) 999 { 1000 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 1001 Mat outA; 1002 PetscErrorCode ierr; 1003 PetscTruth row_identity,col_identity; 1004 1005 PetscFunctionBegin; 1006 outA = inA; 1007 inA->factor = FACTOR_CHOLESKY; 1008 1009 if (!a->diag) { 1010 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 1011 } 1012 /* 1013 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1014 for ILU(0) factorization with natural ordering 1015 */ 1016 switch (a->bs) { 1017 case 1: 1018 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1019 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1020 inA->ops->solves = MatSolves_SeqSBAIJ_1; 1021 ierr = PetscLoginfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n"));CHKERRQ(ierr); 1022 case 2: 1023 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 1024 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1025 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1026 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"));CHKERRQ(ierr); 1027 break; 1028 case 3: 1029 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 1030 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1031 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1032 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n"));CHKERRQ(ierr); 1033 break; 1034 case 4: 1035 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 1036 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1037 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1038 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"));CHKERRQ(ierr); 1039 break; 1040 case 5: 1041 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 1042 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1043 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1044 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"));CHKERRQ(ierr); 1045 break; 1046 case 6: 1047 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 1048 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1049 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1050 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"));CHKERRQ(ierr); 1051 break; 1052 case 7: 1053 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 1054 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1055 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1056 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"));CHKERRQ(ierr); 1057 break; 1058 default: 1059 a->row = row; 1060 a->icol = col; 1061 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1062 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1063 1064 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1065 ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr); 1066 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 1067 1068 if (!a->solve_work) { 1069 ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1070 ierr = PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 1071 } 1072 } 1073 1074 ierr = MatCholeskyFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 1075 PetscFunctionReturn(0); 1076 } 1077 #endif 1078 1079 #undef __FUNCT__ 1080 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ" 1081 PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat A) 1082 { 1083 static PetscTruth called = PETSC_FALSE; 1084 MPI_Comm comm = A->comm; 1085 PetscErrorCode ierr; 1086 1087 PetscFunctionBegin; 1088 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1089 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1090 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1091 ierr = (*PetscHelpPrintf)(comm," -mat_ignore_ltriangular: Ignore lower triangular values set by user\n");CHKERRQ(ierr); 1092 PetscFunctionReturn(0); 1093 } 1094 1095 EXTERN_C_BEGIN 1096 #undef __FUNCT__ 1097 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1098 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,PetscInt *indices) 1099 { 1100 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1101 PetscInt i,nz,n; 1102 1103 PetscFunctionBegin; 1104 nz = baij->maxnz; 1105 n = mat->n; 1106 for (i=0; i<nz; i++) { 1107 baij->j[i] = indices[i]; 1108 } 1109 baij->nz = nz; 1110 for (i=0; i<n; i++) { 1111 baij->ilen[i] = baij->imax[i]; 1112 } 1113 PetscFunctionReturn(0); 1114 } 1115 EXTERN_C_END 1116 1117 #undef __FUNCT__ 1118 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1119 /*@ 1120 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1121 in the matrix. 1122 1123 Input Parameters: 1124 + mat - the SeqSBAIJ matrix 1125 - indices - the column indices 1126 1127 Level: advanced 1128 1129 Notes: 1130 This can be called if you have precomputed the nonzero structure of the 1131 matrix and want to provide it to the matrix object to improve the performance 1132 of the MatSetValues() operation. 1133 1134 You MUST have set the correct numbers of nonzeros per row in the call to 1135 MatCreateSeqSBAIJ(), and the columns indices MUST be sorted. 1136 1137 MUST be called before any calls to MatSetValues() 1138 1139 .seealso: MatCreateSeqSBAIJ 1140 @*/ 1141 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1142 { 1143 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1144 1145 PetscFunctionBegin; 1146 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1147 PetscValidPointer(indices,2); 1148 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1149 if (f) { 1150 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1151 } else { 1152 SETERRQ(PETSC_ERR_SUP,"Wrong type of matrix to set column indices"); 1153 } 1154 PetscFunctionReturn(0); 1155 } 1156 1157 #undef __FUNCT__ 1158 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1159 PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A) 1160 { 1161 PetscErrorCode ierr; 1162 1163 PetscFunctionBegin; 1164 ierr = MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1165 PetscFunctionReturn(0); 1166 } 1167 1168 #undef __FUNCT__ 1169 #define __FUNCT__ "MatGetArray_SeqSBAIJ" 1170 PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1171 { 1172 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1173 PetscFunctionBegin; 1174 *array = a->a; 1175 PetscFunctionReturn(0); 1176 } 1177 1178 #undef __FUNCT__ 1179 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ" 1180 PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1181 { 1182 PetscFunctionBegin; 1183 PetscFunctionReturn(0); 1184 } 1185 1186 #include "petscblaslapack.h" 1187 #undef __FUNCT__ 1188 #define __FUNCT__ "MatAXPY_SeqSBAIJ" 1189 PetscErrorCode MatAXPY_SeqSBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str) 1190 { 1191 Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data; 1192 PetscErrorCode ierr; 1193 PetscInt i,bs=Y->bs,bs2,j; 1194 PetscBLASInt bnz = (PetscBLASInt)x->nz,one = 1; 1195 1196 PetscFunctionBegin; 1197 if (str == SAME_NONZERO_PATTERN) { 1198 BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1199 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1200 if (y->xtoy && y->XtoY != X) { 1201 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1202 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1203 } 1204 if (!y->xtoy) { /* get xtoy */ 1205 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1206 y->XtoY = X; 1207 } 1208 bs2 = bs*bs; 1209 for (i=0; i<x->nz; i++) { 1210 j = 0; 1211 while (j < bs2){ 1212 y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]); 1213 j++; 1214 } 1215 } 1216 ierr = PetscLogInfo((0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)));CHKERRQ(ierr); 1217 } else { 1218 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1219 } 1220 PetscFunctionReturn(0); 1221 } 1222 1223 #undef __FUNCT__ 1224 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ" 1225 PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg) 1226 { 1227 PetscFunctionBegin; 1228 *flg = PETSC_TRUE; 1229 PetscFunctionReturn(0); 1230 } 1231 1232 #undef __FUNCT__ 1233 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ" 1234 PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg) 1235 { 1236 PetscFunctionBegin; 1237 *flg = PETSC_TRUE; 1238 PetscFunctionReturn(0); 1239 } 1240 1241 #undef __FUNCT__ 1242 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ" 1243 PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscTruth *flg) 1244 { 1245 PetscFunctionBegin; 1246 *flg = PETSC_FALSE; 1247 PetscFunctionReturn(0); 1248 } 1249 1250 /* -------------------------------------------------------------------*/ 1251 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1252 MatGetRow_SeqSBAIJ, 1253 MatRestoreRow_SeqSBAIJ, 1254 MatMult_SeqSBAIJ_N, 1255 /* 4*/ MatMultAdd_SeqSBAIJ_N, 1256 MatMult_SeqSBAIJ_N, 1257 MatMultAdd_SeqSBAIJ_N, 1258 MatSolve_SeqSBAIJ_N, 1259 0, 1260 0, 1261 /*10*/ 0, 1262 0, 1263 MatCholeskyFactor_SeqSBAIJ, 1264 MatRelax_SeqSBAIJ, 1265 MatTranspose_SeqSBAIJ, 1266 /*15*/ MatGetInfo_SeqSBAIJ, 1267 MatEqual_SeqSBAIJ, 1268 MatGetDiagonal_SeqSBAIJ, 1269 MatDiagonalScale_SeqSBAIJ, 1270 MatNorm_SeqSBAIJ, 1271 /*20*/ 0, 1272 MatAssemblyEnd_SeqSBAIJ, 1273 0, 1274 MatSetOption_SeqSBAIJ, 1275 MatZeroEntries_SeqSBAIJ, 1276 /*25*/ 0, 1277 0, 1278 0, 1279 MatCholeskyFactorSymbolic_SeqSBAIJ, 1280 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1281 /*30*/ MatSetUpPreallocation_SeqSBAIJ, 1282 0, 1283 MatICCFactorSymbolic_SeqSBAIJ, 1284 MatGetArray_SeqSBAIJ, 1285 MatRestoreArray_SeqSBAIJ, 1286 /*35*/ MatDuplicate_SeqSBAIJ, 1287 0, 1288 0, 1289 0, 1290 0, 1291 /*40*/ MatAXPY_SeqSBAIJ, 1292 MatGetSubMatrices_SeqSBAIJ, 1293 MatIncreaseOverlap_SeqSBAIJ, 1294 MatGetValues_SeqSBAIJ, 1295 0, 1296 /*45*/ MatPrintHelp_SeqSBAIJ, 1297 MatScale_SeqSBAIJ, 1298 0, 1299 0, 1300 0, 1301 /*50*/ 0, 1302 MatGetRowIJ_SeqSBAIJ, 1303 MatRestoreRowIJ_SeqSBAIJ, 1304 0, 1305 0, 1306 /*55*/ 0, 1307 0, 1308 0, 1309 0, 1310 MatSetValuesBlocked_SeqSBAIJ, 1311 /*60*/ MatGetSubMatrix_SeqSBAIJ, 1312 0, 1313 0, 1314 MatGetPetscMaps_Petsc, 1315 0, 1316 /*65*/ 0, 1317 0, 1318 0, 1319 0, 1320 0, 1321 /*70*/ MatGetRowMax_SeqSBAIJ, 1322 0, 1323 0, 1324 0, 1325 0, 1326 /*75*/ 0, 1327 0, 1328 0, 1329 0, 1330 0, 1331 /*80*/ 0, 1332 0, 1333 0, 1334 #if !defined(PETSC_USE_COMPLEX) 1335 MatGetInertia_SeqSBAIJ, 1336 #else 1337 0, 1338 #endif 1339 MatLoad_SeqSBAIJ, 1340 /*85*/ MatIsSymmetric_SeqSBAIJ, 1341 MatIsHermitian_SeqSBAIJ, 1342 MatIsStructurallySymmetric_SeqSBAIJ, 1343 0, 1344 0, 1345 /*90*/ 0, 1346 0, 1347 0, 1348 0, 1349 0, 1350 /*95*/ 0, 1351 0, 1352 0, 1353 0}; 1354 1355 EXTERN_C_BEGIN 1356 #undef __FUNCT__ 1357 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1358 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqSBAIJ(Mat mat) 1359 { 1360 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1361 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 1362 PetscErrorCode ierr; 1363 1364 PetscFunctionBegin; 1365 if (aij->nonew != 1) { 1366 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1367 } 1368 1369 /* allocate space for values if not already there */ 1370 if (!aij->saved_values) { 1371 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1372 } 1373 1374 /* copy values over */ 1375 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1376 PetscFunctionReturn(0); 1377 } 1378 EXTERN_C_END 1379 1380 EXTERN_C_BEGIN 1381 #undef __FUNCT__ 1382 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1383 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqSBAIJ(Mat mat) 1384 { 1385 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1386 PetscErrorCode ierr; 1387 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 1388 1389 PetscFunctionBegin; 1390 if (aij->nonew != 1) { 1391 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1392 } 1393 if (!aij->saved_values) { 1394 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 1395 } 1396 1397 /* copy values over */ 1398 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1399 PetscFunctionReturn(0); 1400 } 1401 EXTERN_C_END 1402 1403 EXTERN_C_BEGIN 1404 #undef __FUNCT__ 1405 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ" 1406 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 1407 { 1408 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1409 PetscErrorCode ierr; 1410 PetscInt i,len,mbs,bs2; 1411 PetscTruth flg; 1412 1413 PetscFunctionBegin; 1414 B->preallocated = PETSC_TRUE; 1415 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1416 mbs = B->m/bs; 1417 bs2 = bs*bs; 1418 1419 if (mbs*bs != B->m) { 1420 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1421 } 1422 1423 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1424 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 1425 if (nnz) { 1426 for (i=0; i<mbs; i++) { 1427 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 1428 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],mbs); 1429 } 1430 } 1431 1432 ierr = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1433 if (!flg) { 1434 switch (bs) { 1435 case 1: 1436 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1437 B->ops->solve = MatSolve_SeqSBAIJ_1; 1438 B->ops->solves = MatSolves_SeqSBAIJ_1; 1439 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 1440 B->ops->mult = MatMult_SeqSBAIJ_1; 1441 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1442 break; 1443 case 2: 1444 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1445 B->ops->solve = MatSolve_SeqSBAIJ_2; 1446 B->ops->solvetranspose = MatSolve_SeqSBAIJ_2; 1447 B->ops->mult = MatMult_SeqSBAIJ_2; 1448 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1449 break; 1450 case 3: 1451 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1452 B->ops->solve = MatSolve_SeqSBAIJ_3; 1453 B->ops->solvetranspose = MatSolve_SeqSBAIJ_3; 1454 B->ops->mult = MatMult_SeqSBAIJ_3; 1455 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1456 break; 1457 case 4: 1458 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1459 B->ops->solve = MatSolve_SeqSBAIJ_4; 1460 B->ops->solvetranspose = MatSolve_SeqSBAIJ_4; 1461 B->ops->mult = MatMult_SeqSBAIJ_4; 1462 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1463 break; 1464 case 5: 1465 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1466 B->ops->solve = MatSolve_SeqSBAIJ_5; 1467 B->ops->solvetranspose = MatSolve_SeqSBAIJ_5; 1468 B->ops->mult = MatMult_SeqSBAIJ_5; 1469 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1470 break; 1471 case 6: 1472 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1473 B->ops->solve = MatSolve_SeqSBAIJ_6; 1474 B->ops->solvetranspose = MatSolve_SeqSBAIJ_6; 1475 B->ops->mult = MatMult_SeqSBAIJ_6; 1476 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1477 break; 1478 case 7: 1479 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1480 B->ops->solve = MatSolve_SeqSBAIJ_7; 1481 B->ops->solvetranspose = MatSolve_SeqSBAIJ_7; 1482 B->ops->mult = MatMult_SeqSBAIJ_7; 1483 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1484 break; 1485 } 1486 } 1487 1488 b->mbs = mbs; 1489 b->nbs = mbs; 1490 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->imax);CHKERRQ(ierr); 1491 if (!nnz) { 1492 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1493 else if (nz <= 0) nz = 1; 1494 for (i=0; i<mbs; i++) { 1495 b->imax[i] = nz; 1496 } 1497 nz = nz*mbs; /* total nz */ 1498 } else { 1499 nz = 0; 1500 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1501 } 1502 /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1503 1504 /* allocate the matrix space */ 1505 len = nz*sizeof(PetscInt) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(PetscInt); 1506 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1507 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1508 b->j = (PetscInt*)(b->a + nz*bs2); 1509 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1510 b->i = b->j + nz; 1511 b->singlemalloc = PETSC_TRUE; 1512 1513 /* pointer to beginning of each row */ 1514 b->i[0] = 0; 1515 for (i=1; i<mbs+1; i++) { 1516 b->i[i] = b->i[i-1] + b->imax[i-1]; 1517 } 1518 1519 /* b->ilen will count nonzeros in each block row so far. */ 1520 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->ilen);CHKERRQ(ierr); 1521 ierr = PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr); 1522 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1523 1524 B->bs = bs; 1525 b->bs2 = bs2; 1526 b->nz = 0; 1527 b->maxnz = nz*bs2; 1528 1529 b->inew = 0; 1530 b->jnew = 0; 1531 b->anew = 0; 1532 b->a2anew = 0; 1533 b->permute = PETSC_FALSE; 1534 PetscFunctionReturn(0); 1535 } 1536 EXTERN_C_END 1537 1538 EXTERN_C_BEGIN 1539 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqAIJ(Mat,const MatType,MatReuse,Mat*); 1540 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqBAIJ(Mat,const MatType,MatReuse,Mat*); 1541 EXTERN_C_END 1542 1543 /*MC 1544 MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices, 1545 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1546 1547 Options Database Keys: 1548 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions() 1549 1550 Level: beginner 1551 1552 .seealso: MatCreateSeqSBAIJ 1553 M*/ 1554 1555 EXTERN_C_BEGIN 1556 #undef __FUNCT__ 1557 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1558 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqSBAIJ(Mat B) 1559 { 1560 Mat_SeqSBAIJ *b; 1561 PetscErrorCode ierr; 1562 PetscMPIInt size; 1563 PetscTruth flg; 1564 1565 PetscFunctionBegin; 1566 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1567 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1568 B->m = B->M = PetscMax(B->m,B->M); 1569 B->n = B->N = PetscMax(B->n,B->N); 1570 1571 ierr = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1572 B->data = (void*)b; 1573 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1574 B->ops->destroy = MatDestroy_SeqSBAIJ; 1575 B->ops->view = MatView_SeqSBAIJ; 1576 B->factor = 0; 1577 B->mapping = 0; 1578 b->row = 0; 1579 b->icol = 0; 1580 b->reallocs = 0; 1581 b->saved_values = 0; 1582 1583 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1584 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1585 1586 b->sorted = PETSC_FALSE; 1587 b->roworiented = PETSC_TRUE; 1588 b->nonew = 0; 1589 b->diag = 0; 1590 b->solve_work = 0; 1591 b->mult_work = 0; 1592 B->spptr = 0; 1593 b->keepzeroedrows = PETSC_FALSE; 1594 b->xtoy = 0; 1595 b->XtoY = 0; 1596 1597 b->inew = 0; 1598 b->jnew = 0; 1599 b->anew = 0; 1600 b->a2anew = 0; 1601 b->permute = PETSC_FALSE; 1602 1603 b->ignore_ltriangular = PETSC_FALSE; 1604 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_ignore_lower_triangular",&flg);CHKERRQ(ierr); 1605 if (flg) b->ignore_ltriangular = PETSC_TRUE; 1606 1607 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1608 "MatStoreValues_SeqSBAIJ", 1609 MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1610 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1611 "MatRetrieveValues_SeqSBAIJ", 1612 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1613 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1614 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1615 MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1616 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C", 1617 "MatConvert_SeqSBAIJ_SeqAIJ", 1618 MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr); 1619 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C", 1620 "MatConvert_SeqSBAIJ_SeqBAIJ", 1621 MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr); 1622 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C", 1623 "MatSeqSBAIJSetPreallocation_SeqSBAIJ", 1624 MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr); 1625 1626 B->symmetric = PETSC_TRUE; 1627 B->structurally_symmetric = PETSC_TRUE; 1628 B->symmetric_set = PETSC_TRUE; 1629 B->structurally_symmetric_set = PETSC_TRUE; 1630 PetscFunctionReturn(0); 1631 } 1632 EXTERN_C_END 1633 1634 #undef __FUNCT__ 1635 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1636 /*@C 1637 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1638 compressed row) format. For good matrix assembly performance the 1639 user should preallocate the matrix storage by setting the parameter nz 1640 (or the array nnz). By setting these parameters accurately, performance 1641 during matrix assembly can be increased by more than a factor of 50. 1642 1643 Collective on Mat 1644 1645 Input Parameters: 1646 + A - the symmetric matrix 1647 . bs - size of block 1648 . nz - number of block nonzeros per block row (same for all rows) 1649 - nnz - array containing the number of block nonzeros in the upper triangular plus 1650 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1651 1652 Options Database Keys: 1653 . -mat_no_unroll - uses code that does not unroll the loops in the 1654 block calculations (much slower) 1655 . -mat_block_size - size of the blocks to use 1656 1657 Level: intermediate 1658 1659 Notes: 1660 Specify the preallocated storage with either nz or nnz (not both). 1661 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1662 allocation. For additional details, see the users manual chapter on 1663 matrices. 1664 1665 If the nnz parameter is given then the nz parameter is ignored 1666 1667 1668 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1669 @*/ 1670 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 1671 { 1672 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 1673 1674 PetscFunctionBegin; 1675 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1676 if (f) { 1677 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 1678 } 1679 PetscFunctionReturn(0); 1680 } 1681 1682 #undef __FUNCT__ 1683 #define __FUNCT__ "MatCreateSeqSBAIJ" 1684 /*@C 1685 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1686 compressed row) format. For good matrix assembly performance the 1687 user should preallocate the matrix storage by setting the parameter nz 1688 (or the array nnz). By setting these parameters accurately, performance 1689 during matrix assembly can be increased by more than a factor of 50. 1690 1691 Collective on MPI_Comm 1692 1693 Input Parameters: 1694 + comm - MPI communicator, set to PETSC_COMM_SELF 1695 . bs - size of block 1696 . m - number of rows, or number of columns 1697 . nz - number of block nonzeros per block row (same for all rows) 1698 - nnz - array containing the number of block nonzeros in the upper triangular plus 1699 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1700 1701 Output Parameter: 1702 . A - the symmetric matrix 1703 1704 Options Database Keys: 1705 . -mat_no_unroll - uses code that does not unroll the loops in the 1706 block calculations (much slower) 1707 . -mat_block_size - size of the blocks to use 1708 1709 Level: intermediate 1710 1711 Notes: 1712 1713 Specify the preallocated storage with either nz or nnz (not both). 1714 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1715 allocation. For additional details, see the users manual chapter on 1716 matrices. 1717 1718 If the nnz parameter is given then the nz parameter is ignored 1719 1720 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1721 @*/ 1722 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 1723 { 1724 PetscErrorCode ierr; 1725 1726 PetscFunctionBegin; 1727 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1728 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1729 ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1730 PetscFunctionReturn(0); 1731 } 1732 1733 #undef __FUNCT__ 1734 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1735 PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1736 { 1737 Mat C; 1738 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1739 PetscErrorCode ierr; 1740 PetscInt i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 1741 1742 PetscFunctionBegin; 1743 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1744 1745 *B = 0; 1746 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1747 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 1748 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1749 c = (Mat_SeqSBAIJ*)C->data; 1750 1751 C->preallocated = PETSC_TRUE; 1752 C->factor = A->factor; 1753 c->row = 0; 1754 c->icol = 0; 1755 c->saved_values = 0; 1756 c->keepzeroedrows = a->keepzeroedrows; 1757 C->assembled = PETSC_TRUE; 1758 1759 C->M = A->M; 1760 C->N = A->N; 1761 C->bs = A->bs; 1762 c->bs2 = a->bs2; 1763 c->mbs = a->mbs; 1764 c->nbs = a->nbs; 1765 1766 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->imax);CHKERRQ(ierr); 1767 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->ilen);CHKERRQ(ierr); 1768 for (i=0; i<mbs; i++) { 1769 c->imax[i] = a->imax[i]; 1770 c->ilen[i] = a->ilen[i]; 1771 } 1772 1773 /* allocate the matrix space */ 1774 c->singlemalloc = PETSC_TRUE; 1775 len = (mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt)); 1776 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1777 c->j = (PetscInt*)(c->a + nz*bs2); 1778 c->i = c->j + nz; 1779 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1780 if (mbs > 0) { 1781 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1782 if (cpvalues == MAT_COPY_VALUES) { 1783 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1784 } else { 1785 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1786 } 1787 } 1788 1789 ierr = PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr); 1790 c->sorted = a->sorted; 1791 c->roworiented = a->roworiented; 1792 c->nonew = a->nonew; 1793 1794 if (a->diag) { 1795 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 1796 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1797 for (i=0; i<mbs; i++) { 1798 c->diag[i] = a->diag[i]; 1799 } 1800 } else c->diag = 0; 1801 c->nz = a->nz; 1802 c->maxnz = a->maxnz; 1803 c->solve_work = 0; 1804 c->mult_work = 0; 1805 *B = C; 1806 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1807 PetscFunctionReturn(0); 1808 } 1809 1810 #undef __FUNCT__ 1811 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1812 PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer,const MatType type,Mat *A) 1813 { 1814 Mat_SeqSBAIJ *a; 1815 Mat B; 1816 PetscErrorCode ierr; 1817 int fd; 1818 PetscMPIInt size; 1819 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 1820 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 1821 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 1822 PetscInt *masked,nmask,tmp,bs2,ishift; 1823 PetscScalar *aa; 1824 MPI_Comm comm = ((PetscObject)viewer)->comm; 1825 1826 PetscFunctionBegin; 1827 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1828 bs2 = bs*bs; 1829 1830 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1831 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1832 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1833 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1834 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1835 M = header[1]; N = header[2]; nz = header[3]; 1836 1837 if (header[3] < 0) { 1838 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1839 } 1840 1841 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1842 1843 /* 1844 This code adds extra rows to make sure the number of rows is 1845 divisible by the blocksize 1846 */ 1847 mbs = M/bs; 1848 extra_rows = bs - M + bs*(mbs); 1849 if (extra_rows == bs) extra_rows = 0; 1850 else mbs++; 1851 if (extra_rows) { 1852 ierr = PetscLogInfo((0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr); 1853 } 1854 1855 /* read in row lengths */ 1856 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1857 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1858 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1859 1860 /* read in column indices */ 1861 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1862 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1863 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1864 1865 /* loop over row lengths determining block row lengths */ 1866 ierr = PetscMalloc(mbs*sizeof(PetscInt),&s_browlengths);CHKERRQ(ierr); 1867 ierr = PetscMemzero(s_browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1868 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 1869 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1870 masked = mask + mbs; 1871 rowcount = 0; nzcount = 0; 1872 for (i=0; i<mbs; i++) { 1873 nmask = 0; 1874 for (j=0; j<bs; j++) { 1875 kmax = rowlengths[rowcount]; 1876 for (k=0; k<kmax; k++) { 1877 tmp = jj[nzcount++]/bs; /* block col. index */ 1878 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1879 } 1880 rowcount++; 1881 } 1882 s_browlengths[i] += nmask; 1883 1884 /* zero out the mask elements we set */ 1885 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1886 } 1887 1888 /* create our matrix */ 1889 ierr = MatCreate(comm,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows,&B);CHKERRQ(ierr); 1890 ierr = MatSetType(B,type);CHKERRQ(ierr); 1891 ierr = MatSeqSBAIJSetPreallocation(B,bs,0,s_browlengths);CHKERRQ(ierr); 1892 a = (Mat_SeqSBAIJ*)B->data; 1893 1894 /* set matrix "i" values */ 1895 a->i[0] = 0; 1896 for (i=1; i<= mbs; i++) { 1897 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1898 a->ilen[i-1] = s_browlengths[i-1]; 1899 } 1900 a->nz = a->i[mbs]; 1901 1902 /* read in nonzero values */ 1903 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1904 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1905 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1906 1907 /* set "a" and "j" values into matrix */ 1908 nzcount = 0; jcount = 0; 1909 for (i=0; i<mbs; i++) { 1910 nzcountb = nzcount; 1911 nmask = 0; 1912 for (j=0; j<bs; j++) { 1913 kmax = rowlengths[i*bs+j]; 1914 for (k=0; k<kmax; k++) { 1915 tmp = jj[nzcount++]/bs; /* block col. index */ 1916 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1917 } 1918 } 1919 /* sort the masked values */ 1920 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1921 1922 /* set "j" values into matrix */ 1923 maskcount = 1; 1924 for (j=0; j<nmask; j++) { 1925 a->j[jcount++] = masked[j]; 1926 mask[masked[j]] = maskcount++; 1927 } 1928 1929 /* set "a" values into matrix */ 1930 ishift = bs2*a->i[i]; 1931 for (j=0; j<bs; j++) { 1932 kmax = rowlengths[i*bs+j]; 1933 for (k=0; k<kmax; k++) { 1934 tmp = jj[nzcountb]/bs ; /* block col. index */ 1935 if (tmp >= i){ 1936 block = mask[tmp] - 1; 1937 point = jj[nzcountb] - bs*tmp; 1938 idx = ishift + bs2*block + j + bs*point; 1939 a->a[idx] = aa[nzcountb]; 1940 } 1941 nzcountb++; 1942 } 1943 } 1944 /* zero out the mask elements we set */ 1945 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1946 } 1947 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1948 1949 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1950 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1951 ierr = PetscFree(aa);CHKERRQ(ierr); 1952 ierr = PetscFree(jj);CHKERRQ(ierr); 1953 ierr = PetscFree(mask);CHKERRQ(ierr); 1954 1955 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1956 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1957 ierr = MatView_Private(B);CHKERRQ(ierr); 1958 *A = B; 1959 PetscFunctionReturn(0); 1960 } 1961 1962 #undef __FUNCT__ 1963 #define __FUNCT__ "MatRelax_SeqSBAIJ" 1964 PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1965 { 1966 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1967 MatScalar *aa=a->a,*v,*v1; 1968 PetscScalar *x,*b,*t,sum,d; 1969 PetscErrorCode ierr; 1970 PetscInt m=a->mbs,bs=A->bs,*ai=a->i,*aj=a->j; 1971 PetscInt nz,nz1,*vj,*vj1,i; 1972 1973 PetscFunctionBegin; 1974 its = its*lits; 1975 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1976 1977 if (bs > 1) 1978 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 1979 1980 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1981 if (xx != bb) { 1982 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1983 } else { 1984 b = x; 1985 } 1986 1987 ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr); 1988 1989 if (flag & SOR_ZERO_INITIAL_GUESS) { 1990 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1991 for (i=0; i<m; i++) 1992 t[i] = b[i]; 1993 1994 for (i=0; i<m; i++){ 1995 d = *(aa + ai[i]); /* diag[i] */ 1996 v = aa + ai[i] + 1; 1997 vj = aj + ai[i] + 1; 1998 nz = ai[i+1] - ai[i] - 1; 1999 x[i] = omega*t[i]/d; 2000 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 2001 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2002 } 2003 } 2004 2005 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2006 for (i=0; i<m; i++) 2007 t[i] = b[i]; 2008 2009 for (i=0; i<m-1; i++){ /* update rhs */ 2010 v = aa + ai[i] + 1; 2011 vj = aj + ai[i] + 1; 2012 nz = ai[i+1] - ai[i] - 1; 2013 while (nz--) t[*vj++] -= x[i]*(*v++); 2014 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2015 } 2016 for (i=m-1; i>=0; i--){ 2017 d = *(aa + ai[i]); 2018 v = aa + ai[i] + 1; 2019 vj = aj + ai[i] + 1; 2020 nz = ai[i+1] - ai[i] - 1; 2021 sum = t[i]; 2022 while (nz--) sum -= x[*vj++]*(*v++); 2023 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2024 x[i] = (1-omega)*x[i] + omega*sum/d; 2025 } 2026 } 2027 its--; 2028 } 2029 2030 while (its--) { 2031 /* 2032 forward sweep: 2033 for i=0,...,m-1: 2034 sum[i] = (b[i] - U(i,:)x )/d[i]; 2035 x[i] = (1-omega)x[i] + omega*sum[i]; 2036 b = b - x[i]*U^T(i,:); 2037 2038 */ 2039 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2040 for (i=0; i<m; i++) 2041 t[i] = b[i]; 2042 2043 for (i=0; i<m; i++){ 2044 d = *(aa + ai[i]); /* diag[i] */ 2045 v = aa + ai[i] + 1; v1=v; 2046 vj = aj + ai[i] + 1; vj1=vj; 2047 nz = ai[i+1] - ai[i] - 1; nz1=nz; 2048 sum = t[i]; 2049 while (nz1--) sum -= (*v1++)*x[*vj1++]; 2050 x[i] = (1-omega)*x[i] + omega*sum/d; 2051 while (nz--) t[*vj++] -= x[i]*(*v++); 2052 ierr = PetscLogFlops(4*nz-2);CHKERRQ(ierr); 2053 } 2054 } 2055 2056 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2057 /* 2058 backward sweep: 2059 b = b - x[i]*U^T(i,:), i=0,...,n-2 2060 for i=m-1,...,0: 2061 sum[i] = (b[i] - U(i,:)x )/d[i]; 2062 x[i] = (1-omega)x[i] + omega*sum[i]; 2063 */ 2064 for (i=0; i<m; i++) 2065 t[i] = b[i]; 2066 2067 for (i=0; i<m-1; i++){ /* update rhs */ 2068 v = aa + ai[i] + 1; 2069 vj = aj + ai[i] + 1; 2070 nz = ai[i+1] - ai[i] - 1; 2071 while (nz--) t[*vj++] -= x[i]*(*v++); 2072 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2073 } 2074 for (i=m-1; i>=0; i--){ 2075 d = *(aa + ai[i]); 2076 v = aa + ai[i] + 1; 2077 vj = aj + ai[i] + 1; 2078 nz = ai[i+1] - ai[i] - 1; 2079 sum = t[i]; 2080 while (nz--) sum -= x[*vj++]*(*v++); 2081 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2082 x[i] = (1-omega)*x[i] + omega*sum/d; 2083 } 2084 } 2085 } 2086 2087 ierr = PetscFree(t);CHKERRQ(ierr); 2088 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2089 if (bb != xx) { 2090 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2091 } 2092 PetscFunctionReturn(0); 2093 } 2094 2095 2096 2097 2098 2099 2100