1 2 /* 3 Defines the basic matrix operations for the SBAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 7 #include "src/inline/spops.h" 8 #include "src/mat/impls/sbaij/seq/sbaij.h" 9 10 #define CHUNKSIZE 10 11 12 /* 13 Checks for missing diagonals 14 */ 15 #undef __FUNCT__ 16 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 17 PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A) 18 { 19 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 20 PetscErrorCode ierr; 21 PetscInt *diag,*jj = a->j,i; 22 23 PetscFunctionBegin; 24 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 25 diag = a->diag; 26 for (i=0; i<a->mbs; i++) { 27 if (jj[diag[i]] != i) SETERRQ1(PETSC_ERR_ARG_CORRUPT,"Matrix is missing diagonal number %D",i); 28 } 29 PetscFunctionReturn(0); 30 } 31 32 #undef __FUNCT__ 33 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 34 PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A) 35 { 36 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 37 PetscErrorCode ierr; 38 PetscInt i,mbs = a->mbs; 39 40 PetscFunctionBegin; 41 if (a->diag) PetscFunctionReturn(0); 42 43 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 44 ierr = PetscLogObjectMemory(A,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 45 for (i=0; i<mbs; i++) a->diag[i] = a->i[i]; 46 PetscFunctionReturn(0); 47 } 48 49 #undef __FUNCT__ 50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 51 static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 52 { 53 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 54 PetscInt n = a->mbs,i; 55 56 PetscFunctionBegin; 57 *nn = n; 58 if (!ia) PetscFunctionReturn(0); 59 60 if (oshift == 1) { 61 /* temporarily add 1 to i and j indices */ 62 PetscInt nz = a->i[n]; 63 for (i=0; i<nz; i++) a->j[i]++; 64 for (i=0; i<n+1; i++) a->i[i]++; 65 *ia = a->i; *ja = a->j; 66 } else { 67 *ia = a->i; *ja = a->j; 68 } 69 PetscFunctionReturn(0); 70 } 71 72 #undef __FUNCT__ 73 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 74 static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 75 { 76 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 77 PetscInt i,n = a->mbs; 78 79 PetscFunctionBegin; 80 if (!ia) PetscFunctionReturn(0); 81 82 if (oshift == 1) { 83 PetscInt nz = a->i[n]-1; 84 for (i=0; i<nz; i++) a->j[i]--; 85 for (i=0; i<n+1; i++) a->i[i]--; 86 } 87 PetscFunctionReturn(0); 88 } 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 92 PetscErrorCode MatDestroy_SeqSBAIJ(Mat A) 93 { 94 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 95 PetscErrorCode ierr; 96 97 PetscFunctionBegin; 98 PetscLogObjectState((PetscObject)A,"Rows=%D, NZ=%D",A->m,a->nz); 99 ierr = PetscFree(a->a);CHKERRQ(ierr); 100 if (!a->singlemalloc) { 101 ierr = PetscFree(a->i);CHKERRQ(ierr); 102 ierr = PetscFree(a->j);CHKERRQ(ierr); 103 } 104 if (a->row) { 105 ierr = ISDestroy(a->row);CHKERRQ(ierr); 106 } 107 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 108 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 109 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 110 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 111 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 112 if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);} 113 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 114 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 115 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 116 117 if (a->inew){ 118 ierr = PetscFree(a->inew);CHKERRQ(ierr); 119 a->inew = 0; 120 } 121 ierr = PetscFree(a);CHKERRQ(ierr); 122 123 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 124 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 125 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 126 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 127 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqbaij_C","",PETSC_NULL);CHKERRQ(ierr); 128 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 129 PetscFunctionReturn(0); 130 } 131 132 #undef __FUNCT__ 133 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 134 PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op) 135 { 136 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 137 138 PetscFunctionBegin; 139 switch (op) { 140 case MAT_ROW_ORIENTED: 141 a->roworiented = PETSC_TRUE; 142 break; 143 case MAT_COLUMN_ORIENTED: 144 a->roworiented = PETSC_FALSE; 145 break; 146 case MAT_COLUMNS_SORTED: 147 a->sorted = PETSC_TRUE; 148 break; 149 case MAT_COLUMNS_UNSORTED: 150 a->sorted = PETSC_FALSE; 151 break; 152 case MAT_KEEP_ZEROED_ROWS: 153 a->keepzeroedrows = PETSC_TRUE; 154 break; 155 case MAT_NO_NEW_NONZERO_LOCATIONS: 156 a->nonew = 1; 157 break; 158 case MAT_NEW_NONZERO_LOCATION_ERR: 159 a->nonew = -1; 160 break; 161 case MAT_NEW_NONZERO_ALLOCATION_ERR: 162 a->nonew = -2; 163 break; 164 case MAT_YES_NEW_NONZERO_LOCATIONS: 165 a->nonew = 0; 166 break; 167 case MAT_ROWS_SORTED: 168 case MAT_ROWS_UNSORTED: 169 case MAT_YES_NEW_DIAGONALS: 170 case MAT_IGNORE_OFF_PROC_ENTRIES: 171 case MAT_USE_HASH_TABLE: 172 PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n"); 173 break; 174 case MAT_NO_NEW_DIAGONALS: 175 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 176 case MAT_NOT_SYMMETRIC: 177 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 178 case MAT_HERMITIAN: 179 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 180 case MAT_SYMMETRIC: 181 case MAT_STRUCTURALLY_SYMMETRIC: 182 case MAT_NOT_HERMITIAN: 183 case MAT_SYMMETRY_ETERNAL: 184 case MAT_NOT_SYMMETRY_ETERNAL: 185 break; 186 default: 187 SETERRQ(PETSC_ERR_SUP,"unknown option"); 188 } 189 PetscFunctionReturn(0); 190 } 191 192 #undef __FUNCT__ 193 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 194 PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **v) 195 { 196 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 197 PetscErrorCode ierr; 198 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2; 199 MatScalar *aa,*aa_i; 200 PetscScalar *v_i; 201 202 PetscFunctionBegin; 203 bs = A->bs; 204 ai = a->i; 205 aj = a->j; 206 aa = a->a; 207 bs2 = a->bs2; 208 209 if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %D out of range", row); 210 211 bn = row/bs; /* Block number */ 212 bp = row % bs; /* Block position */ 213 M = ai[bn+1] - ai[bn]; 214 *ncols = bs*M; 215 216 if (v) { 217 *v = 0; 218 if (*ncols) { 219 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 220 for (i=0; i<M; i++) { /* for each block in the block row */ 221 v_i = *v + i*bs; 222 aa_i = aa + bs2*(ai[bn] + i); 223 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 224 } 225 } 226 } 227 228 if (cols) { 229 *cols = 0; 230 if (*ncols) { 231 ierr = PetscMalloc((*ncols+row)*sizeof(PetscInt),cols);CHKERRQ(ierr); 232 for (i=0; i<M; i++) { /* for each block in the block row */ 233 cols_i = *cols + i*bs; 234 itmp = bs*aj[ai[bn] + i]; 235 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 236 } 237 } 238 } 239 240 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 241 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 242 #ifdef column_search 243 v_i = *v + M*bs; 244 cols_i = *cols + M*bs; 245 for (i=0; i<bn; i++){ /* for each block row */ 246 M = ai[i+1] - ai[i]; 247 for (j=0; j<M; j++){ 248 itmp = aj[ai[i] + j]; /* block column value */ 249 if (itmp == bn){ 250 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 251 for (k=0; k<bs; k++) { 252 *cols_i++ = i*bs+k; 253 *v_i++ = aa_i[k]; 254 } 255 *ncols += bs; 256 break; 257 } 258 } 259 } 260 #endif 261 262 PetscFunctionReturn(0); 263 } 264 265 #undef __FUNCT__ 266 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 267 PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 268 { 269 PetscErrorCode ierr; 270 271 PetscFunctionBegin; 272 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 273 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 274 PetscFunctionReturn(0); 275 } 276 277 #undef __FUNCT__ 278 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 279 PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,Mat *B) 280 { 281 PetscErrorCode ierr; 282 PetscFunctionBegin; 283 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 284 PetscFunctionReturn(0); 285 } 286 287 #undef __FUNCT__ 288 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 289 static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 290 { 291 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 292 PetscErrorCode ierr; 293 PetscInt i,j,bs = A->bs,k,l,bs2=a->bs2; 294 char *name; 295 PetscViewerFormat format; 296 297 PetscFunctionBegin; 298 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 299 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 300 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 301 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 302 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 303 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 304 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 305 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 306 for (i=0; i<a->mbs; i++) { 307 for (j=0; j<bs; j++) { 308 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 309 for (k=a->i[i]; k<a->i[i+1]; k++) { 310 for (l=0; l<bs; l++) { 311 #if defined(PETSC_USE_COMPLEX) 312 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 313 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 314 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 315 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 316 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 317 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 318 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 319 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 320 } 321 #else 322 if (a->a[bs2*k + l*bs + j] != 0.0) { 323 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 324 } 325 #endif 326 } 327 } 328 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 329 } 330 } 331 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 332 } else { 333 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 334 for (i=0; i<a->mbs; i++) { 335 for (j=0; j<bs; j++) { 336 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 337 for (k=a->i[i]; k<a->i[i+1]; k++) { 338 for (l=0; l<bs; l++) { 339 #if defined(PETSC_USE_COMPLEX) 340 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 341 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 342 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 343 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 344 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 345 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 346 } else { 347 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 348 } 349 #else 350 ierr = PetscViewerASCIIPrintf(viewer," %D %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 351 #endif 352 } 353 } 354 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 355 } 356 } 357 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 358 } 359 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 360 PetscFunctionReturn(0); 361 } 362 363 #undef __FUNCT__ 364 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 365 static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 366 { 367 Mat A = (Mat) Aa; 368 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 369 PetscErrorCode ierr; 370 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->bs,bs2=a->bs2; 371 PetscMPIInt rank; 372 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 373 MatScalar *aa; 374 MPI_Comm comm; 375 PetscViewer viewer; 376 377 PetscFunctionBegin; 378 /* 379 This is nasty. If this is called from an originally parallel matrix 380 then all processes call this,but only the first has the matrix so the 381 rest should return immediately. 382 */ 383 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 384 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 385 if (rank) PetscFunctionReturn(0); 386 387 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 388 389 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 390 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 391 392 /* loop over matrix elements drawing boxes */ 393 color = PETSC_DRAW_BLUE; 394 for (i=0,row=0; i<mbs; i++,row+=bs) { 395 for (j=a->i[i]; j<a->i[i+1]; j++) { 396 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 397 x_l = a->j[j]*bs; x_r = x_l + 1.0; 398 aa = a->a + j*bs2; 399 for (k=0; k<bs; k++) { 400 for (l=0; l<bs; l++) { 401 if (PetscRealPart(*aa++) >= 0.) continue; 402 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 403 } 404 } 405 } 406 } 407 color = PETSC_DRAW_CYAN; 408 for (i=0,row=0; i<mbs; i++,row+=bs) { 409 for (j=a->i[i]; j<a->i[i+1]; j++) { 410 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 411 x_l = a->j[j]*bs; x_r = x_l + 1.0; 412 aa = a->a + j*bs2; 413 for (k=0; k<bs; k++) { 414 for (l=0; l<bs; l++) { 415 if (PetscRealPart(*aa++) != 0.) continue; 416 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 417 } 418 } 419 } 420 } 421 422 color = PETSC_DRAW_RED; 423 for (i=0,row=0; i<mbs; i++,row+=bs) { 424 for (j=a->i[i]; j<a->i[i+1]; j++) { 425 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 426 x_l = a->j[j]*bs; x_r = x_l + 1.0; 427 aa = a->a + j*bs2; 428 for (k=0; k<bs; k++) { 429 for (l=0; l<bs; l++) { 430 if (PetscRealPart(*aa++) <= 0.) continue; 431 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 432 } 433 } 434 } 435 } 436 PetscFunctionReturn(0); 437 } 438 439 #undef __FUNCT__ 440 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 441 static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 442 { 443 PetscErrorCode ierr; 444 PetscReal xl,yl,xr,yr,w,h; 445 PetscDraw draw; 446 PetscTruth isnull; 447 448 PetscFunctionBegin; 449 450 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 451 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 452 453 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 454 xr = A->m; yr = A->m; h = yr/10.0; w = xr/10.0; 455 xr += w; yr += h; xl = -w; yl = -h; 456 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 457 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 458 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 459 PetscFunctionReturn(0); 460 } 461 462 #undef __FUNCT__ 463 #define __FUNCT__ "MatView_SeqSBAIJ" 464 PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 465 { 466 PetscErrorCode ierr; 467 PetscTruth iascii,isdraw; 468 469 PetscFunctionBegin; 470 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 471 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 472 if (iascii){ 473 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 474 } else if (isdraw) { 475 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 476 } else { 477 Mat B; 478 ierr = MatConvert(A,MATSEQAIJ,&B);CHKERRQ(ierr); 479 ierr = MatView(B,viewer);CHKERRQ(ierr); 480 ierr = MatDestroy(B);CHKERRQ(ierr); 481 } 482 PetscFunctionReturn(0); 483 } 484 485 486 #undef __FUNCT__ 487 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 488 PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 489 { 490 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 491 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 492 PetscInt *ai = a->i,*ailen = a->ilen; 493 PetscInt brow,bcol,ridx,cidx,bs=A->bs,bs2=a->bs2; 494 MatScalar *ap,*aa = a->a,zero = 0.0; 495 496 PetscFunctionBegin; 497 for (k=0; k<m; k++) { /* loop over rows */ 498 row = im[k]; brow = row/bs; 499 if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",row); 500 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1); 501 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 502 nrow = ailen[brow]; 503 for (l=0; l<n; l++) { /* loop over columns */ 504 if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",in[l]); 505 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->n-1); 506 col = in[l] ; 507 bcol = col/bs; 508 cidx = col%bs; 509 ridx = row%bs; 510 high = nrow; 511 low = 0; /* assume unsorted */ 512 while (high-low > 5) { 513 t = (low+high)/2; 514 if (rp[t] > bcol) high = t; 515 else low = t; 516 } 517 for (i=low; i<high; i++) { 518 if (rp[i] > bcol) break; 519 if (rp[i] == bcol) { 520 *v++ = ap[bs2*i+bs*cidx+ridx]; 521 goto finished; 522 } 523 } 524 *v++ = zero; 525 finished:; 526 } 527 } 528 PetscFunctionReturn(0); 529 } 530 531 532 #undef __FUNCT__ 533 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 534 PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 535 { 536 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 537 PetscErrorCode ierr; 538 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 539 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 540 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->bs,stepval; 541 PetscTruth roworiented=a->roworiented; 542 const MatScalar *value = v; 543 MatScalar *ap,*aa = a->a,*bap; 544 545 PetscFunctionBegin; 546 if (roworiented) { 547 stepval = (n-1)*bs; 548 } else { 549 stepval = (m-1)*bs; 550 } 551 for (k=0; k<m; k++) { /* loop over added rows */ 552 row = im[k]; 553 if (row < 0) continue; 554 #if defined(PETSC_USE_DEBUG) 555 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 556 #endif 557 rp = aj + ai[row]; 558 ap = aa + bs2*ai[row]; 559 rmax = imax[row]; 560 nrow = ailen[row]; 561 low = 0; 562 high = nrow; 563 for (l=0; l<n; l++) { /* loop over added columns */ 564 if (in[l] < 0) continue; 565 col = in[l]; 566 #if defined(PETSC_USE_DEBUG) 567 if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",col,a->nbs-1); 568 #endif 569 if (col < row) continue; /* ignore lower triangular block */ 570 if (roworiented) { 571 value = v + k*(stepval+bs)*bs + l*bs; 572 } else { 573 value = v + l*(stepval+bs)*bs + k*bs; 574 } 575 if (col < lastcol) low = 0; else high = nrow; 576 lastcol = col; 577 while (high-low > 7) { 578 t = (low+high)/2; 579 if (rp[t] > col) high = t; 580 else low = t; 581 } 582 for (i=low; i<high; i++) { 583 if (rp[i] > col) break; 584 if (rp[i] == col) { 585 bap = ap + bs2*i; 586 if (roworiented) { 587 if (is == ADD_VALUES) { 588 for (ii=0; ii<bs; ii++,value+=stepval) { 589 for (jj=ii; jj<bs2; jj+=bs) { 590 bap[jj] += *value++; 591 } 592 } 593 } else { 594 for (ii=0; ii<bs; ii++,value+=stepval) { 595 for (jj=ii; jj<bs2; jj+=bs) { 596 bap[jj] = *value++; 597 } 598 } 599 } 600 } else { 601 if (is == ADD_VALUES) { 602 for (ii=0; ii<bs; ii++,value+=stepval) { 603 for (jj=0; jj<bs; jj++) { 604 *bap++ += *value++; 605 } 606 } 607 } else { 608 for (ii=0; ii<bs; ii++,value+=stepval) { 609 for (jj=0; jj<bs; jj++) { 610 *bap++ = *value++; 611 } 612 } 613 } 614 } 615 goto noinsert2; 616 } 617 } 618 if (nonew == 1) goto noinsert2; 619 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 620 if (nrow >= rmax) { 621 /* there is no extra room in row, therefore enlarge */ 622 PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 623 MatScalar *new_a; 624 625 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 626 627 /* malloc new storage space */ 628 len = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); 629 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 630 new_j = (PetscInt*)(new_a + bs2*new_nz); 631 new_i = new_j + new_nz; 632 633 /* copy over old data into new slots */ 634 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 635 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 636 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); 637 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 638 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); 639 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 640 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 641 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 642 /* free up old matrix storage */ 643 ierr = PetscFree(a->a);CHKERRQ(ierr); 644 if (!a->singlemalloc) { 645 ierr = PetscFree(a->i);CHKERRQ(ierr); 646 ierr = PetscFree(a->j);CHKERRQ(ierr); 647 } 648 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 649 a->singlemalloc = PETSC_TRUE; 650 651 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 652 rmax = imax[row] = imax[row] + CHUNKSIZE; 653 ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); 654 a->maxnz += bs2*CHUNKSIZE; 655 a->reallocs++; 656 a->nz++; 657 } 658 N = nrow++ - 1; 659 /* shift up all the later entries in this row */ 660 for (ii=N; ii>=i; ii--) { 661 rp[ii+1] = rp[ii]; 662 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 663 } 664 if (N >= i) { 665 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 666 } 667 rp[i] = col; 668 bap = ap + bs2*i; 669 if (roworiented) { 670 for (ii=0; ii<bs; ii++,value+=stepval) { 671 for (jj=ii; jj<bs2; jj+=bs) { 672 bap[jj] = *value++; 673 } 674 } 675 } else { 676 for (ii=0; ii<bs; ii++,value+=stepval) { 677 for (jj=0; jj<bs; jj++) { 678 *bap++ = *value++; 679 } 680 } 681 } 682 noinsert2:; 683 low = i; 684 } 685 ailen[row] = nrow; 686 } 687 PetscFunctionReturn(0); 688 } 689 690 #undef __FUNCT__ 691 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 692 PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 693 { 694 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 695 PetscErrorCode ierr; 696 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 697 PetscInt m = A->m,*ip,N,*ailen = a->ilen; 698 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 699 MatScalar *aa = a->a,*ap; 700 701 PetscFunctionBegin; 702 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 703 704 if (m) rmax = ailen[0]; 705 for (i=1; i<mbs; i++) { 706 /* move each row back by the amount of empty slots (fshift) before it*/ 707 fshift += imax[i-1] - ailen[i-1]; 708 rmax = PetscMax(rmax,ailen[i]); 709 if (fshift) { 710 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 711 N = ailen[i]; 712 for (j=0; j<N; j++) { 713 ip[j-fshift] = ip[j]; 714 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 715 } 716 } 717 ai[i] = ai[i-1] + ailen[i-1]; 718 } 719 if (mbs) { 720 fshift += imax[mbs-1] - ailen[mbs-1]; 721 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 722 } 723 /* reset ilen and imax for each row */ 724 for (i=0; i<mbs; i++) { 725 ailen[i] = imax[i] = ai[i+1] - ai[i]; 726 } 727 a->nz = ai[mbs]; 728 729 /* diagonals may have moved, reset it */ 730 if (a->diag) { 731 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 732 } 733 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n", 734 m,A->m,A->bs,fshift*bs2,a->nz*bs2); 735 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %D\n", 736 a->reallocs); 737 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %D\n",rmax); 738 a->reallocs = 0; 739 A->info.nz_unneeded = (PetscReal)fshift*bs2; 740 741 PetscFunctionReturn(0); 742 } 743 744 /* 745 This function returns an array of flags which indicate the locations of contiguous 746 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 747 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 748 Assume: sizes should be long enough to hold all the values. 749 */ 750 #undef __FUNCT__ 751 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 752 PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 753 { 754 PetscInt i,j,k,row; 755 PetscTruth flg; 756 757 PetscFunctionBegin; 758 for (i=0,j=0; i<n; j++) { 759 row = idx[i]; 760 if (row%bs!=0) { /* Not the begining of a block */ 761 sizes[j] = 1; 762 i++; 763 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 764 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 765 i++; 766 } else { /* Begining of the block, so check if the complete block exists */ 767 flg = PETSC_TRUE; 768 for (k=1; k<bs; k++) { 769 if (row+k != idx[i+k]) { /* break in the block */ 770 flg = PETSC_FALSE; 771 break; 772 } 773 } 774 if (flg) { /* No break in the bs */ 775 sizes[j] = bs; 776 i+= bs; 777 } else { 778 sizes[j] = 1; 779 i++; 780 } 781 } 782 } 783 *bs_max = j; 784 PetscFunctionReturn(0); 785 } 786 787 788 /* Only add/insert a(i,j) with i<=j (blocks). 789 Any a(i,j) with i>j input by user is ingored. 790 */ 791 792 #undef __FUNCT__ 793 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 794 PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 795 { 796 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 797 PetscErrorCode ierr; 798 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 799 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 800 PetscInt *aj=a->j,nonew=a->nonew,bs=A->bs,brow,bcol; 801 PetscInt ridx,cidx,bs2=a->bs2; 802 MatScalar *ap,value,*aa=a->a,*bap; 803 804 PetscFunctionBegin; 805 806 for (k=0; k<m; k++) { /* loop over added rows */ 807 row = im[k]; /* row number */ 808 brow = row/bs; /* block row number */ 809 if (row < 0) continue; 810 #if defined(PETSC_USE_DEBUG) 811 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1); 812 #endif 813 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 814 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 815 rmax = imax[brow]; /* maximum space allocated for this row */ 816 nrow = ailen[brow]; /* actual length of this row */ 817 low = 0; 818 high = nrow; 819 820 for (l=0; l<n; l++) { /* loop over added columns */ 821 if (in[l] < 0) continue; 822 #if defined(PETSC_USE_DEBUG) 823 if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->m-1); 824 #endif 825 col = in[l]; 826 bcol = col/bs; /* block col number */ 827 828 if (brow <= bcol){ 829 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 830 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 831 /* element value a(k,l) */ 832 if (roworiented) { 833 value = v[l + k*n]; 834 } else { 835 value = v[k + l*m]; 836 } 837 838 /* move pointer bap to a(k,l) quickly and add/insert value */ 839 if (col < lastcol) low = 0; else high = nrow; 840 lastcol = col; 841 while (high-low > 7) { 842 t = (low+high)/2; 843 if (rp[t] > bcol) high = t; 844 else low = t; 845 } 846 for (i=low; i<high; i++) { 847 /* printf("The loop of i=low.., rp[%D]=%D\n",i,rp[i]); */ 848 if (rp[i] > bcol) break; 849 if (rp[i] == bcol) { 850 bap = ap + bs2*i + bs*cidx + ridx; 851 if (is == ADD_VALUES) *bap += value; 852 else *bap = value; 853 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 854 if (brow == bcol && ridx < cidx){ 855 bap = ap + bs2*i + bs*ridx + cidx; 856 if (is == ADD_VALUES) *bap += value; 857 else *bap = value; 858 } 859 goto noinsert1; 860 } 861 } 862 863 if (nonew == 1) goto noinsert1; 864 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 865 if (nrow >= rmax) { 866 /* there is no extra room in row, therefore enlarge */ 867 PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 868 MatScalar *new_a; 869 870 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 871 872 /* Malloc new storage space */ 873 len = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); 874 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 875 new_j = (PetscInt*)(new_a + bs2*new_nz); 876 new_i = new_j + new_nz; 877 878 /* copy over old data into new slots */ 879 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 880 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 881 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); 882 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 883 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); 884 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 885 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 886 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 887 /* free up old matrix storage */ 888 ierr = PetscFree(a->a);CHKERRQ(ierr); 889 if (!a->singlemalloc) { 890 ierr = PetscFree(a->i);CHKERRQ(ierr); 891 ierr = PetscFree(a->j);CHKERRQ(ierr); 892 } 893 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 894 a->singlemalloc = PETSC_TRUE; 895 896 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 897 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 898 ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); 899 a->maxnz += bs2*CHUNKSIZE; 900 a->reallocs++; 901 a->nz++; 902 } 903 904 N = nrow++ - 1; 905 /* shift up all the later entries in this row */ 906 for (ii=N; ii>=i; ii--) { 907 rp[ii+1] = rp[ii]; 908 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 909 } 910 if (N>=i) { 911 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 912 } 913 rp[i] = bcol; 914 ap[bs2*i + bs*cidx + ridx] = value; 915 noinsert1:; 916 low = i; 917 /* } */ 918 } 919 } /* end of if .. if.. */ 920 } /* end of loop over added columns */ 921 ailen[brow] = nrow; 922 } /* end of loop over added rows */ 923 924 PetscFunctionReturn(0); 925 } 926 927 EXTERN PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*); 928 EXTERN PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*); 929 EXTERN PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat,PetscInt,IS[],PetscInt); 930 EXTERN PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,PetscInt,MatReuse,Mat*); 931 EXTERN PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat*[]); 932 EXTERN PetscErrorCode MatScale_SeqSBAIJ(const PetscScalar*,Mat); 933 EXTERN PetscErrorCode MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *); 934 EXTERN PetscErrorCode MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*); 935 EXTERN PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat,Vec); 936 EXTERN PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec); 937 EXTERN PetscErrorCode MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *); 938 EXTERN PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat); 939 EXTERN PetscErrorCode MatGetRowMax_SeqSBAIJ(Mat,Vec); 940 941 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N(Mat,Vec,Vec); 942 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1(Mat,Vec,Vec); 943 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2(Mat,Vec,Vec); 944 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3(Mat,Vec,Vec); 945 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4(Mat,Vec,Vec); 946 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5(Mat,Vec,Vec); 947 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6(Mat,Vec,Vec); 948 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7(Mat,Vec,Vec); 949 950 EXTERN PetscErrorCode MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs); 951 952 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec); 953 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec); 954 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec); 955 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 956 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec); 957 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec); 958 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec); 959 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec); 960 961 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,MatFactorInfo*,Mat*); 962 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,MatFactorInfo*,Mat*); 963 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,MatFactorInfo*,Mat*); 964 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,MatFactorInfo*,Mat*); 965 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,MatFactorInfo*,Mat*); 966 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,MatFactorInfo*,Mat*); 967 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,MatFactorInfo*,Mat*); 968 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,MatFactorInfo*,Mat*); 969 EXTERN PetscErrorCode MatGetInertia_SeqSBAIJ(Mat,PetscInt*,PetscInt*,PetscInt*); 970 971 EXTERN PetscErrorCode MatMult_SeqSBAIJ_1(Mat,Vec,Vec); 972 EXTERN PetscErrorCode MatMult_SeqSBAIJ_2(Mat,Vec,Vec); 973 EXTERN PetscErrorCode MatMult_SeqSBAIJ_3(Mat,Vec,Vec); 974 EXTERN PetscErrorCode MatMult_SeqSBAIJ_4(Mat,Vec,Vec); 975 EXTERN PetscErrorCode MatMult_SeqSBAIJ_5(Mat,Vec,Vec); 976 EXTERN PetscErrorCode MatMult_SeqSBAIJ_6(Mat,Vec,Vec); 977 EXTERN PetscErrorCode MatMult_SeqSBAIJ_7(Mat,Vec,Vec); 978 EXTERN PetscErrorCode MatMult_SeqSBAIJ_N(Mat,Vec,Vec); 979 980 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec); 981 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec); 982 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec); 983 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec); 984 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec); 985 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec); 986 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec); 987 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec); 988 989 #ifdef HAVE_MatICCFactor 990 /* modified from MatILUFactor_SeqSBAIJ, needs further work! */ 991 #undef __FUNCT__ 992 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 993 PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info) 994 { 995 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 996 Mat outA; 997 PetscErrorCode ierr; 998 PetscTruth row_identity,col_identity; 999 1000 PetscFunctionBegin; 1001 outA = inA; 1002 inA->factor = FACTOR_CHOLESKY; 1003 1004 if (!a->diag) { 1005 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 1006 } 1007 /* 1008 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1009 for ILU(0) factorization with natural ordering 1010 */ 1011 switch (a->bs) { 1012 case 1: 1013 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1014 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1015 inA->ops->solves = MatSolves_SeqSBAIJ_1; 1016 PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n"); 1017 case 2: 1018 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 1019 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1020 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1021 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1022 break; 1023 case 3: 1024 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 1025 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1026 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1027 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n"); 1028 break; 1029 case 4: 1030 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 1031 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1032 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1033 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1034 break; 1035 case 5: 1036 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 1037 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1038 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1039 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1040 break; 1041 case 6: 1042 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 1043 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1044 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1045 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1046 break; 1047 case 7: 1048 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 1049 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1050 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1051 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1052 break; 1053 default: 1054 a->row = row; 1055 a->icol = col; 1056 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1057 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1058 1059 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1060 ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr); 1061 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 1062 1063 if (!a->solve_work) { 1064 ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1065 ierr = PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 1066 } 1067 } 1068 1069 ierr = MatCholeskyFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 1070 1071 PetscFunctionReturn(0); 1072 } 1073 #endif 1074 1075 #undef __FUNCT__ 1076 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ" 1077 PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat A) 1078 { 1079 static PetscTruth called = PETSC_FALSE; 1080 MPI_Comm comm = A->comm; 1081 PetscErrorCode ierr; 1082 1083 PetscFunctionBegin; 1084 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1085 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1086 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1087 PetscFunctionReturn(0); 1088 } 1089 1090 EXTERN_C_BEGIN 1091 #undef __FUNCT__ 1092 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1093 PetscErrorCode MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,PetscInt *indices) 1094 { 1095 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1096 PetscInt i,nz,n; 1097 1098 PetscFunctionBegin; 1099 nz = baij->maxnz; 1100 n = mat->n; 1101 for (i=0; i<nz; i++) { 1102 baij->j[i] = indices[i]; 1103 } 1104 baij->nz = nz; 1105 for (i=0; i<n; i++) { 1106 baij->ilen[i] = baij->imax[i]; 1107 } 1108 1109 PetscFunctionReturn(0); 1110 } 1111 EXTERN_C_END 1112 1113 #undef __FUNCT__ 1114 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1115 /*@ 1116 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1117 in the matrix. 1118 1119 Input Parameters: 1120 + mat - the SeqSBAIJ matrix 1121 - indices - the column indices 1122 1123 Level: advanced 1124 1125 Notes: 1126 This can be called if you have precomputed the nonzero structure of the 1127 matrix and want to provide it to the matrix object to improve the performance 1128 of the MatSetValues() operation. 1129 1130 You MUST have set the correct numbers of nonzeros per row in the call to 1131 MatCreateSeqSBAIJ(). 1132 1133 MUST be called before any calls to MatSetValues() 1134 1135 .seealso: MatCreateSeqSBAIJ 1136 @*/ 1137 PetscErrorCode MatSeqSBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1138 { 1139 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1140 1141 PetscFunctionBegin; 1142 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1143 PetscValidPointer(indices,2); 1144 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1145 if (f) { 1146 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1147 } else { 1148 SETERRQ(PETSC_ERR_SUP,"Wrong type of matrix to set column indices"); 1149 } 1150 PetscFunctionReturn(0); 1151 } 1152 1153 #undef __FUNCT__ 1154 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1155 PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A) 1156 { 1157 PetscErrorCode ierr; 1158 1159 PetscFunctionBegin; 1160 ierr = MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1161 PetscFunctionReturn(0); 1162 } 1163 1164 #undef __FUNCT__ 1165 #define __FUNCT__ "MatGetArray_SeqSBAIJ" 1166 PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1167 { 1168 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1169 PetscFunctionBegin; 1170 *array = a->a; 1171 PetscFunctionReturn(0); 1172 } 1173 1174 #undef __FUNCT__ 1175 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ" 1176 PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1177 { 1178 PetscFunctionBegin; 1179 PetscFunctionReturn(0); 1180 } 1181 1182 #include "petscblaslapack.h" 1183 #undef __FUNCT__ 1184 #define __FUNCT__ "MatAXPY_SeqSBAIJ" 1185 PetscErrorCode MatAXPY_SeqSBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str) 1186 { 1187 Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data; 1188 PetscErrorCode ierr; 1189 PetscInt i,bs=Y->bs,bs2,j; 1190 PetscBLASInt bnz = (PetscBLASInt)x->nz,one = 1; 1191 1192 PetscFunctionBegin; 1193 if (str == SAME_NONZERO_PATTERN) { 1194 BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1195 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1196 if (y->xtoy && y->XtoY != X) { 1197 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1198 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1199 } 1200 if (!y->xtoy) { /* get xtoy */ 1201 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1202 y->XtoY = X; 1203 } 1204 bs2 = bs*bs; 1205 for (i=0; i<x->nz; i++) { 1206 j = 0; 1207 while (j < bs2){ 1208 y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]); 1209 j++; 1210 } 1211 } 1212 PetscLogInfo(0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)); 1213 } else { 1214 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1215 } 1216 PetscFunctionReturn(0); 1217 } 1218 1219 #undef __FUNCT__ 1220 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ" 1221 PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg) 1222 { 1223 PetscFunctionBegin; 1224 *flg = PETSC_TRUE; 1225 PetscFunctionReturn(0); 1226 } 1227 1228 #undef __FUNCT__ 1229 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ" 1230 PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg) 1231 { 1232 PetscFunctionBegin; 1233 *flg = PETSC_TRUE; 1234 PetscFunctionReturn(0); 1235 } 1236 1237 #undef __FUNCT__ 1238 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ" 1239 PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscTruth *flg) 1240 { 1241 PetscFunctionBegin; 1242 *flg = PETSC_FALSE; 1243 PetscFunctionReturn(0); 1244 } 1245 1246 /* -------------------------------------------------------------------*/ 1247 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1248 MatGetRow_SeqSBAIJ, 1249 MatRestoreRow_SeqSBAIJ, 1250 MatMult_SeqSBAIJ_N, 1251 /* 4*/ MatMultAdd_SeqSBAIJ_N, 1252 MatMult_SeqSBAIJ_N, 1253 MatMultAdd_SeqSBAIJ_N, 1254 MatSolve_SeqSBAIJ_N, 1255 0, 1256 0, 1257 /*10*/ 0, 1258 0, 1259 MatCholeskyFactor_SeqSBAIJ, 1260 MatRelax_SeqSBAIJ, 1261 MatTranspose_SeqSBAIJ, 1262 /*15*/ MatGetInfo_SeqSBAIJ, 1263 MatEqual_SeqSBAIJ, 1264 MatGetDiagonal_SeqSBAIJ, 1265 MatDiagonalScale_SeqSBAIJ, 1266 MatNorm_SeqSBAIJ, 1267 /*20*/ 0, 1268 MatAssemblyEnd_SeqSBAIJ, 1269 0, 1270 MatSetOption_SeqSBAIJ, 1271 MatZeroEntries_SeqSBAIJ, 1272 /*25*/ 0, 1273 0, 1274 0, 1275 MatCholeskyFactorSymbolic_SeqSBAIJ, 1276 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1277 /*30*/ MatSetUpPreallocation_SeqSBAIJ, 1278 0, 1279 MatICCFactorSymbolic_SeqSBAIJ, 1280 MatGetArray_SeqSBAIJ, 1281 MatRestoreArray_SeqSBAIJ, 1282 /*35*/ MatDuplicate_SeqSBAIJ, 1283 0, 1284 0, 1285 0, 1286 0, 1287 /*40*/ MatAXPY_SeqSBAIJ, 1288 MatGetSubMatrices_SeqSBAIJ, 1289 MatIncreaseOverlap_SeqSBAIJ, 1290 MatGetValues_SeqSBAIJ, 1291 0, 1292 /*45*/ MatPrintHelp_SeqSBAIJ, 1293 MatScale_SeqSBAIJ, 1294 0, 1295 0, 1296 0, 1297 /*50*/ 0, 1298 MatGetRowIJ_SeqSBAIJ, 1299 MatRestoreRowIJ_SeqSBAIJ, 1300 0, 1301 0, 1302 /*55*/ 0, 1303 0, 1304 0, 1305 0, 1306 MatSetValuesBlocked_SeqSBAIJ, 1307 /*60*/ MatGetSubMatrix_SeqSBAIJ, 1308 0, 1309 0, 1310 MatGetPetscMaps_Petsc, 1311 0, 1312 /*65*/ 0, 1313 0, 1314 0, 1315 0, 1316 0, 1317 /*70*/ MatGetRowMax_SeqSBAIJ, 1318 0, 1319 0, 1320 0, 1321 0, 1322 /*75*/ 0, 1323 0, 1324 0, 1325 0, 1326 0, 1327 /*80*/ 0, 1328 0, 1329 0, 1330 #if !defined(PETSC_USE_COMPLEX) 1331 MatGetInertia_SeqSBAIJ, 1332 #else 1333 0, 1334 #endif 1335 MatLoad_SeqSBAIJ, 1336 /*85*/ MatIsSymmetric_SeqSBAIJ, 1337 MatIsHermitian_SeqSBAIJ, 1338 MatIsStructurallySymmetric_SeqSBAIJ, 1339 0, 1340 0, 1341 /*90*/ 0, 1342 0, 1343 0, 1344 0, 1345 0, 1346 /*95*/ 0, 1347 0, 1348 0, 1349 0}; 1350 1351 EXTERN_C_BEGIN 1352 #undef __FUNCT__ 1353 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1354 PetscErrorCode MatStoreValues_SeqSBAIJ(Mat mat) 1355 { 1356 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1357 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 1358 PetscErrorCode ierr; 1359 1360 PetscFunctionBegin; 1361 if (aij->nonew != 1) { 1362 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1363 } 1364 1365 /* allocate space for values if not already there */ 1366 if (!aij->saved_values) { 1367 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1368 } 1369 1370 /* copy values over */ 1371 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1372 PetscFunctionReturn(0); 1373 } 1374 EXTERN_C_END 1375 1376 EXTERN_C_BEGIN 1377 #undef __FUNCT__ 1378 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1379 PetscErrorCode MatRetrieveValues_SeqSBAIJ(Mat mat) 1380 { 1381 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1382 PetscErrorCode ierr; 1383 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 1384 1385 PetscFunctionBegin; 1386 if (aij->nonew != 1) { 1387 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1388 } 1389 if (!aij->saved_values) { 1390 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 1391 } 1392 1393 /* copy values over */ 1394 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1395 PetscFunctionReturn(0); 1396 } 1397 EXTERN_C_END 1398 1399 EXTERN_C_BEGIN 1400 #undef __FUNCT__ 1401 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ" 1402 PetscErrorCode MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 1403 { 1404 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1405 PetscErrorCode ierr; 1406 PetscInt i,len,mbs,bs2; 1407 PetscTruth flg; 1408 1409 PetscFunctionBegin; 1410 B->preallocated = PETSC_TRUE; 1411 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1412 mbs = B->m/bs; 1413 bs2 = bs*bs; 1414 1415 if (mbs*bs != B->m) { 1416 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1417 } 1418 1419 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1420 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 1421 if (nnz) { 1422 for (i=0; i<mbs; i++) { 1423 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 1424 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],mbs); 1425 } 1426 } 1427 1428 ierr = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1429 if (!flg) { 1430 switch (bs) { 1431 case 1: 1432 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1433 B->ops->solve = MatSolve_SeqSBAIJ_1; 1434 B->ops->solves = MatSolves_SeqSBAIJ_1; 1435 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 1436 B->ops->mult = MatMult_SeqSBAIJ_1; 1437 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1438 break; 1439 case 2: 1440 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1441 B->ops->solve = MatSolve_SeqSBAIJ_2; 1442 B->ops->solvetranspose = MatSolve_SeqSBAIJ_2; 1443 B->ops->mult = MatMult_SeqSBAIJ_2; 1444 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1445 break; 1446 case 3: 1447 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1448 B->ops->solve = MatSolve_SeqSBAIJ_3; 1449 B->ops->solvetranspose = MatSolve_SeqSBAIJ_3; 1450 B->ops->mult = MatMult_SeqSBAIJ_3; 1451 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1452 break; 1453 case 4: 1454 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1455 B->ops->solve = MatSolve_SeqSBAIJ_4; 1456 B->ops->solvetranspose = MatSolve_SeqSBAIJ_4; 1457 B->ops->mult = MatMult_SeqSBAIJ_4; 1458 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1459 break; 1460 case 5: 1461 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1462 B->ops->solve = MatSolve_SeqSBAIJ_5; 1463 B->ops->solvetranspose = MatSolve_SeqSBAIJ_5; 1464 B->ops->mult = MatMult_SeqSBAIJ_5; 1465 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1466 break; 1467 case 6: 1468 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1469 B->ops->solve = MatSolve_SeqSBAIJ_6; 1470 B->ops->solvetranspose = MatSolve_SeqSBAIJ_6; 1471 B->ops->mult = MatMult_SeqSBAIJ_6; 1472 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1473 break; 1474 case 7: 1475 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1476 B->ops->solve = MatSolve_SeqSBAIJ_7; 1477 B->ops->solvetranspose = MatSolve_SeqSBAIJ_7; 1478 B->ops->mult = MatMult_SeqSBAIJ_7; 1479 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1480 break; 1481 } 1482 } 1483 1484 b->mbs = mbs; 1485 b->nbs = mbs; 1486 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->imax);CHKERRQ(ierr); 1487 if (!nnz) { 1488 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1489 else if (nz <= 0) nz = 1; 1490 for (i=0; i<mbs; i++) { 1491 b->imax[i] = nz; 1492 } 1493 nz = nz*mbs; /* total nz */ 1494 } else { 1495 nz = 0; 1496 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1497 } 1498 /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1499 1500 /* allocate the matrix space */ 1501 len = nz*sizeof(PetscInt) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(PetscInt); 1502 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1503 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1504 b->j = (PetscInt*)(b->a + nz*bs2); 1505 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1506 b->i = b->j + nz; 1507 b->singlemalloc = PETSC_TRUE; 1508 1509 /* pointer to beginning of each row */ 1510 b->i[0] = 0; 1511 for (i=1; i<mbs+1; i++) { 1512 b->i[i] = b->i[i-1] + b->imax[i-1]; 1513 } 1514 1515 /* b->ilen will count nonzeros in each block row so far. */ 1516 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->ilen);CHKERRQ(ierr); 1517 ierr = PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr); 1518 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1519 1520 B->bs = bs; 1521 b->bs2 = bs2; 1522 b->nz = 0; 1523 b->maxnz = nz*bs2; 1524 1525 b->inew = 0; 1526 b->jnew = 0; 1527 b->anew = 0; 1528 b->a2anew = 0; 1529 b->permute = PETSC_FALSE; 1530 PetscFunctionReturn(0); 1531 } 1532 EXTERN_C_END 1533 1534 EXTERN_C_BEGIN 1535 EXTERN PetscErrorCode MatConvert_SeqSBAIJ_SeqAIJ(Mat,const MatType,Mat*); 1536 EXTERN PetscErrorCode MatConvert_SeqSBAIJ_SeqBAIJ(Mat,const MatType,Mat*); 1537 EXTERN_C_END 1538 1539 /*MC 1540 MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices, 1541 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1542 1543 Options Database Keys: 1544 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions() 1545 1546 Level: beginner 1547 1548 .seealso: MatCreateSeqSBAIJ 1549 M*/ 1550 1551 EXTERN_C_BEGIN 1552 #undef __FUNCT__ 1553 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1554 PetscErrorCode MatCreate_SeqSBAIJ(Mat B) 1555 { 1556 Mat_SeqSBAIJ *b; 1557 PetscErrorCode ierr; 1558 PetscMPIInt size; 1559 1560 PetscFunctionBegin; 1561 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1562 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1563 B->m = B->M = PetscMax(B->m,B->M); 1564 B->n = B->N = PetscMax(B->n,B->N); 1565 1566 ierr = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1567 B->data = (void*)b; 1568 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1569 B->ops->destroy = MatDestroy_SeqSBAIJ; 1570 B->ops->view = MatView_SeqSBAIJ; 1571 B->factor = 0; 1572 B->lupivotthreshold = 1.0; 1573 B->mapping = 0; 1574 b->row = 0; 1575 b->icol = 0; 1576 b->reallocs = 0; 1577 b->saved_values = 0; 1578 1579 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1580 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1581 1582 b->sorted = PETSC_FALSE; 1583 b->roworiented = PETSC_TRUE; 1584 b->nonew = 0; 1585 b->diag = 0; 1586 b->solve_work = 0; 1587 b->mult_work = 0; 1588 B->spptr = 0; 1589 b->keepzeroedrows = PETSC_FALSE; 1590 b->xtoy = 0; 1591 b->XtoY = 0; 1592 1593 b->inew = 0; 1594 b->jnew = 0; 1595 b->anew = 0; 1596 b->a2anew = 0; 1597 b->permute = PETSC_FALSE; 1598 1599 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1600 "MatStoreValues_SeqSBAIJ", 1601 MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1602 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1603 "MatRetrieveValues_SeqSBAIJ", 1604 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1605 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1606 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1607 MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1608 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C", 1609 "MatConvert_SeqSBAIJ_SeqAIJ", 1610 MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr); 1611 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C", 1612 "MatConvert_SeqSBAIJ_SeqBAIJ", 1613 MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr); 1614 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C", 1615 "MatSeqSBAIJSetPreallocation_SeqSBAIJ", 1616 MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr); 1617 1618 B->symmetric = PETSC_TRUE; 1619 B->structurally_symmetric = PETSC_TRUE; 1620 B->symmetric_set = PETSC_TRUE; 1621 B->structurally_symmetric_set = PETSC_TRUE; 1622 PetscFunctionReturn(0); 1623 } 1624 EXTERN_C_END 1625 1626 #undef __FUNCT__ 1627 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1628 /*@C 1629 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1630 compressed row) format. For good matrix assembly performance the 1631 user should preallocate the matrix storage by setting the parameter nz 1632 (or the array nnz). By setting these parameters accurately, performance 1633 during matrix assembly can be increased by more than a factor of 50. 1634 1635 Collective on Mat 1636 1637 Input Parameters: 1638 + A - the symmetric matrix 1639 . bs - size of block 1640 . nz - number of block nonzeros per block row (same for all rows) 1641 - nnz - array containing the number of block nonzeros in the upper triangular plus 1642 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1643 1644 Options Database Keys: 1645 . -mat_no_unroll - uses code that does not unroll the loops in the 1646 block calculations (much slower) 1647 . -mat_block_size - size of the blocks to use 1648 1649 Level: intermediate 1650 1651 Notes: 1652 Specify the preallocated storage with either nz or nnz (not both). 1653 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1654 allocation. For additional details, see the users manual chapter on 1655 matrices. 1656 1657 If the nnz parameter is given then the nz parameter is ignored 1658 1659 1660 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1661 @*/ 1662 PetscErrorCode MatSeqSBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 1663 { 1664 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 1665 1666 PetscFunctionBegin; 1667 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1668 if (f) { 1669 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 1670 } 1671 PetscFunctionReturn(0); 1672 } 1673 1674 #undef __FUNCT__ 1675 #define __FUNCT__ "MatCreateSeqSBAIJ" 1676 /*@C 1677 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1678 compressed row) format. For good matrix assembly performance the 1679 user should preallocate the matrix storage by setting the parameter nz 1680 (or the array nnz). By setting these parameters accurately, performance 1681 during matrix assembly can be increased by more than a factor of 50. 1682 1683 Collective on MPI_Comm 1684 1685 Input Parameters: 1686 + comm - MPI communicator, set to PETSC_COMM_SELF 1687 . bs - size of block 1688 . m - number of rows, or number of columns 1689 . nz - number of block nonzeros per block row (same for all rows) 1690 - nnz - array containing the number of block nonzeros in the upper triangular plus 1691 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1692 1693 Output Parameter: 1694 . A - the symmetric matrix 1695 1696 Options Database Keys: 1697 . -mat_no_unroll - uses code that does not unroll the loops in the 1698 block calculations (much slower) 1699 . -mat_block_size - size of the blocks to use 1700 1701 Level: intermediate 1702 1703 Notes: 1704 1705 Specify the preallocated storage with either nz or nnz (not both). 1706 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1707 allocation. For additional details, see the users manual chapter on 1708 matrices. 1709 1710 If the nnz parameter is given then the nz parameter is ignored 1711 1712 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1713 @*/ 1714 PetscErrorCode MatCreateSeqSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 1715 { 1716 PetscErrorCode ierr; 1717 1718 PetscFunctionBegin; 1719 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1720 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1721 ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1722 PetscFunctionReturn(0); 1723 } 1724 1725 #undef __FUNCT__ 1726 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1727 PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1728 { 1729 Mat C; 1730 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1731 PetscErrorCode ierr; 1732 PetscInt i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 1733 1734 PetscFunctionBegin; 1735 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1736 1737 *B = 0; 1738 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1739 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 1740 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1741 c = (Mat_SeqSBAIJ*)C->data; 1742 1743 C->preallocated = PETSC_TRUE; 1744 C->factor = A->factor; 1745 c->row = 0; 1746 c->icol = 0; 1747 c->saved_values = 0; 1748 c->keepzeroedrows = a->keepzeroedrows; 1749 C->assembled = PETSC_TRUE; 1750 1751 C->M = A->M; 1752 C->N = A->N; 1753 C->bs = A->bs; 1754 c->bs2 = a->bs2; 1755 c->mbs = a->mbs; 1756 c->nbs = a->nbs; 1757 1758 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->imax);CHKERRQ(ierr); 1759 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->ilen);CHKERRQ(ierr); 1760 for (i=0; i<mbs; i++) { 1761 c->imax[i] = a->imax[i]; 1762 c->ilen[i] = a->ilen[i]; 1763 } 1764 1765 /* allocate the matrix space */ 1766 c->singlemalloc = PETSC_TRUE; 1767 len = (mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt)); 1768 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1769 c->j = (PetscInt*)(c->a + nz*bs2); 1770 c->i = c->j + nz; 1771 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1772 if (mbs > 0) { 1773 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1774 if (cpvalues == MAT_COPY_VALUES) { 1775 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1776 } else { 1777 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1778 } 1779 } 1780 1781 ierr = PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr); 1782 c->sorted = a->sorted; 1783 c->roworiented = a->roworiented; 1784 c->nonew = a->nonew; 1785 1786 if (a->diag) { 1787 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 1788 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1789 for (i=0; i<mbs; i++) { 1790 c->diag[i] = a->diag[i]; 1791 } 1792 } else c->diag = 0; 1793 c->nz = a->nz; 1794 c->maxnz = a->maxnz; 1795 c->solve_work = 0; 1796 c->mult_work = 0; 1797 *B = C; 1798 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1799 PetscFunctionReturn(0); 1800 } 1801 1802 #undef __FUNCT__ 1803 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1804 PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer,const MatType type,Mat *A) 1805 { 1806 Mat_SeqSBAIJ *a; 1807 Mat B; 1808 PetscErrorCode ierr; 1809 int fd; 1810 PetscMPIInt size; 1811 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 1812 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 1813 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 1814 PetscInt *masked,nmask,tmp,bs2,ishift; 1815 PetscScalar *aa; 1816 MPI_Comm comm = ((PetscObject)viewer)->comm; 1817 1818 PetscFunctionBegin; 1819 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1820 bs2 = bs*bs; 1821 1822 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1823 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1824 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1825 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1826 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1827 M = header[1]; N = header[2]; nz = header[3]; 1828 1829 if (header[3] < 0) { 1830 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1831 } 1832 1833 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1834 1835 /* 1836 This code adds extra rows to make sure the number of rows is 1837 divisible by the blocksize 1838 */ 1839 mbs = M/bs; 1840 extra_rows = bs - M + bs*(mbs); 1841 if (extra_rows == bs) extra_rows = 0; 1842 else mbs++; 1843 if (extra_rows) { 1844 PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n"); 1845 } 1846 1847 /* read in row lengths */ 1848 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1849 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1850 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1851 1852 /* read in column indices */ 1853 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1854 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1855 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1856 1857 /* loop over row lengths determining block row lengths */ 1858 ierr = PetscMalloc(mbs*sizeof(PetscInt),&s_browlengths);CHKERRQ(ierr); 1859 ierr = PetscMemzero(s_browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1860 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 1861 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1862 masked = mask + mbs; 1863 rowcount = 0; nzcount = 0; 1864 for (i=0; i<mbs; i++) { 1865 nmask = 0; 1866 for (j=0; j<bs; j++) { 1867 kmax = rowlengths[rowcount]; 1868 for (k=0; k<kmax; k++) { 1869 tmp = jj[nzcount++]/bs; /* block col. index */ 1870 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1871 } 1872 rowcount++; 1873 } 1874 s_browlengths[i] += nmask; 1875 1876 /* zero out the mask elements we set */ 1877 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1878 } 1879 1880 /* create our matrix */ 1881 ierr = MatCreate(comm,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows,&B);CHKERRQ(ierr); 1882 ierr = MatSetType(B,type);CHKERRQ(ierr); 1883 ierr = MatSeqSBAIJSetPreallocation(B,bs,0,s_browlengths);CHKERRQ(ierr); 1884 a = (Mat_SeqSBAIJ*)B->data; 1885 1886 /* set matrix "i" values */ 1887 a->i[0] = 0; 1888 for (i=1; i<= mbs; i++) { 1889 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1890 a->ilen[i-1] = s_browlengths[i-1]; 1891 } 1892 a->nz = a->i[mbs]; 1893 1894 /* read in nonzero values */ 1895 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1896 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1897 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1898 1899 /* set "a" and "j" values into matrix */ 1900 nzcount = 0; jcount = 0; 1901 for (i=0; i<mbs; i++) { 1902 nzcountb = nzcount; 1903 nmask = 0; 1904 for (j=0; j<bs; j++) { 1905 kmax = rowlengths[i*bs+j]; 1906 for (k=0; k<kmax; k++) { 1907 tmp = jj[nzcount++]/bs; /* block col. index */ 1908 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1909 } 1910 } 1911 /* sort the masked values */ 1912 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1913 1914 /* set "j" values into matrix */ 1915 maskcount = 1; 1916 for (j=0; j<nmask; j++) { 1917 a->j[jcount++] = masked[j]; 1918 mask[masked[j]] = maskcount++; 1919 } 1920 1921 /* set "a" values into matrix */ 1922 ishift = bs2*a->i[i]; 1923 for (j=0; j<bs; j++) { 1924 kmax = rowlengths[i*bs+j]; 1925 for (k=0; k<kmax; k++) { 1926 tmp = jj[nzcountb]/bs ; /* block col. index */ 1927 if (tmp >= i){ 1928 block = mask[tmp] - 1; 1929 point = jj[nzcountb] - bs*tmp; 1930 idx = ishift + bs2*block + j + bs*point; 1931 a->a[idx] = aa[nzcountb]; 1932 } 1933 nzcountb++; 1934 } 1935 } 1936 /* zero out the mask elements we set */ 1937 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1938 } 1939 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1940 1941 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1942 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1943 ierr = PetscFree(aa);CHKERRQ(ierr); 1944 ierr = PetscFree(jj);CHKERRQ(ierr); 1945 ierr = PetscFree(mask);CHKERRQ(ierr); 1946 1947 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1948 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1949 ierr = MatView_Private(B);CHKERRQ(ierr); 1950 *A = B; 1951 PetscFunctionReturn(0); 1952 } 1953 1954 #undef __FUNCT__ 1955 #define __FUNCT__ "MatRelax_SeqSBAIJ" 1956 PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1957 { 1958 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1959 MatScalar *aa=a->a,*v,*v1; 1960 PetscScalar *x,*b,*t,sum,d; 1961 PetscErrorCode ierr; 1962 PetscInt m=a->mbs,bs=A->bs,*ai=a->i,*aj=a->j; 1963 PetscInt nz,nz1,*vj,*vj1,i; 1964 1965 PetscFunctionBegin; 1966 its = its*lits; 1967 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1968 1969 if (bs > 1) 1970 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 1971 1972 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1973 if (xx != bb) { 1974 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1975 } else { 1976 b = x; 1977 } 1978 1979 ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr); 1980 1981 if (flag & SOR_ZERO_INITIAL_GUESS) { 1982 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1983 for (i=0; i<m; i++) 1984 t[i] = b[i]; 1985 1986 for (i=0; i<m; i++){ 1987 d = *(aa + ai[i]); /* diag[i] */ 1988 v = aa + ai[i] + 1; 1989 vj = aj + ai[i] + 1; 1990 nz = ai[i+1] - ai[i] - 1; 1991 x[i] = omega*t[i]/d; 1992 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 1993 PetscLogFlops(2*nz-1); 1994 } 1995 } 1996 1997 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 1998 for (i=0; i<m; i++) 1999 t[i] = b[i]; 2000 2001 for (i=0; i<m-1; i++){ /* update rhs */ 2002 v = aa + ai[i] + 1; 2003 vj = aj + ai[i] + 1; 2004 nz = ai[i+1] - ai[i] - 1; 2005 while (nz--) t[*vj++] -= x[i]*(*v++); 2006 PetscLogFlops(2*nz-1); 2007 } 2008 for (i=m-1; i>=0; i--){ 2009 d = *(aa + ai[i]); 2010 v = aa + ai[i] + 1; 2011 vj = aj + ai[i] + 1; 2012 nz = ai[i+1] - ai[i] - 1; 2013 sum = t[i]; 2014 while (nz--) sum -= x[*vj++]*(*v++); 2015 PetscLogFlops(2*nz-1); 2016 x[i] = (1-omega)*x[i] + omega*sum/d; 2017 } 2018 } 2019 its--; 2020 } 2021 2022 while (its--) { 2023 /* 2024 forward sweep: 2025 for i=0,...,m-1: 2026 sum[i] = (b[i] - U(i,:)x )/d[i]; 2027 x[i] = (1-omega)x[i] + omega*sum[i]; 2028 b = b - x[i]*U^T(i,:); 2029 2030 */ 2031 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2032 for (i=0; i<m; i++) 2033 t[i] = b[i]; 2034 2035 for (i=0; i<m; i++){ 2036 d = *(aa + ai[i]); /* diag[i] */ 2037 v = aa + ai[i] + 1; v1=v; 2038 vj = aj + ai[i] + 1; vj1=vj; 2039 nz = ai[i+1] - ai[i] - 1; nz1=nz; 2040 sum = t[i]; 2041 while (nz1--) sum -= (*v1++)*x[*vj1++]; 2042 x[i] = (1-omega)*x[i] + omega*sum/d; 2043 while (nz--) t[*vj++] -= x[i]*(*v++); 2044 PetscLogFlops(4*nz-2); 2045 } 2046 } 2047 2048 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2049 /* 2050 backward sweep: 2051 b = b - x[i]*U^T(i,:), i=0,...,n-2 2052 for i=m-1,...,0: 2053 sum[i] = (b[i] - U(i,:)x )/d[i]; 2054 x[i] = (1-omega)x[i] + omega*sum[i]; 2055 */ 2056 for (i=0; i<m; i++) 2057 t[i] = b[i]; 2058 2059 for (i=0; i<m-1; i++){ /* update rhs */ 2060 v = aa + ai[i] + 1; 2061 vj = aj + ai[i] + 1; 2062 nz = ai[i+1] - ai[i] - 1; 2063 while (nz--) t[*vj++] -= x[i]*(*v++); 2064 PetscLogFlops(2*nz-1); 2065 } 2066 for (i=m-1; i>=0; i--){ 2067 d = *(aa + ai[i]); 2068 v = aa + ai[i] + 1; 2069 vj = aj + ai[i] + 1; 2070 nz = ai[i+1] - ai[i] - 1; 2071 sum = t[i]; 2072 while (nz--) sum -= x[*vj++]*(*v++); 2073 PetscLogFlops(2*nz-1); 2074 x[i] = (1-omega)*x[i] + omega*sum/d; 2075 } 2076 } 2077 } 2078 2079 ierr = PetscFree(t);CHKERRQ(ierr); 2080 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2081 if (bb != xx) { 2082 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2083 } 2084 2085 PetscFunctionReturn(0); 2086 } 2087 2088 2089 2090 2091 2092 2093