1 2 /* 3 Defines the basic matrix operations for the SBAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 7 #include "src/inline/spops.h" 8 #include "src/mat/impls/sbaij/seq/sbaij.h" 9 10 #define CHUNKSIZE 10 11 12 /* 13 Checks for missing diagonals 14 */ 15 #undef __FUNCT__ 16 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 17 PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A) 18 { 19 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 20 PetscErrorCode ierr; 21 PetscInt *diag,*jj = a->j,i; 22 23 PetscFunctionBegin; 24 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 25 diag = a->diag; 26 for (i=0; i<a->mbs; i++) { 27 if (jj[diag[i]] != i) SETERRQ1(PETSC_ERR_ARG_CORRUPT,"Matrix is missing diagonal number %D",i); 28 } 29 PetscFunctionReturn(0); 30 } 31 32 #undef __FUNCT__ 33 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 34 PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A) 35 { 36 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 37 PetscErrorCode ierr; 38 PetscInt i,mbs = a->mbs; 39 40 PetscFunctionBegin; 41 if (a->diag) PetscFunctionReturn(0); 42 43 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 44 PetscLogObjectMemory(A,(mbs+1)*sizeof(PetscInt)); 45 for (i=0; i<mbs; i++) a->diag[i] = a->i[i]; 46 PetscFunctionReturn(0); 47 } 48 49 #undef __FUNCT__ 50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 51 static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 52 { 53 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 54 PetscInt n = a->mbs,i; 55 56 PetscFunctionBegin; 57 *nn = n; 58 if (!ia) PetscFunctionReturn(0); 59 60 if (oshift == 1) { 61 /* temporarily add 1 to i and j indices */ 62 PetscInt nz = a->i[n]; 63 for (i=0; i<nz; i++) a->j[i]++; 64 for (i=0; i<n+1; i++) a->i[i]++; 65 *ia = a->i; *ja = a->j; 66 } else { 67 *ia = a->i; *ja = a->j; 68 } 69 PetscFunctionReturn(0); 70 } 71 72 #undef __FUNCT__ 73 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 74 static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 75 { 76 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 77 PetscInt i,n = a->mbs; 78 79 PetscFunctionBegin; 80 if (!ia) PetscFunctionReturn(0); 81 82 if (oshift == 1) { 83 PetscInt nz = a->i[n]-1; 84 for (i=0; i<nz; i++) a->j[i]--; 85 for (i=0; i<n+1; i++) a->i[i]--; 86 } 87 PetscFunctionReturn(0); 88 } 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 92 PetscErrorCode MatDestroy_SeqSBAIJ(Mat A) 93 { 94 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 95 PetscErrorCode ierr; 96 97 PetscFunctionBegin; 98 PetscLogObjectState((PetscObject)A,"Rows=%D, NZ=%D",A->m,a->nz); 99 ierr = PetscFree(a->a);CHKERRQ(ierr); 100 if (!a->singlemalloc) { 101 ierr = PetscFree(a->i);CHKERRQ(ierr); 102 ierr = PetscFree(a->j);CHKERRQ(ierr); 103 } 104 if (a->row) { 105 ierr = ISDestroy(a->row);CHKERRQ(ierr); 106 } 107 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 108 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 109 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 110 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 111 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 112 if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);} 113 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 114 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 115 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 116 117 if (a->inew){ 118 ierr = PetscFree(a->inew);CHKERRQ(ierr); 119 a->inew = 0; 120 } 121 ierr = PetscFree(a);CHKERRQ(ierr); 122 123 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 124 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 125 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 126 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 127 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqbaij_C","",PETSC_NULL);CHKERRQ(ierr); 128 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 129 PetscFunctionReturn(0); 130 } 131 132 #undef __FUNCT__ 133 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 134 PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op) 135 { 136 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 137 138 PetscFunctionBegin; 139 switch (op) { 140 case MAT_ROW_ORIENTED: 141 a->roworiented = PETSC_TRUE; 142 break; 143 case MAT_COLUMN_ORIENTED: 144 a->roworiented = PETSC_FALSE; 145 break; 146 case MAT_COLUMNS_SORTED: 147 a->sorted = PETSC_TRUE; 148 break; 149 case MAT_COLUMNS_UNSORTED: 150 a->sorted = PETSC_FALSE; 151 break; 152 case MAT_KEEP_ZEROED_ROWS: 153 a->keepzeroedrows = PETSC_TRUE; 154 break; 155 case MAT_NO_NEW_NONZERO_LOCATIONS: 156 a->nonew = 1; 157 break; 158 case MAT_NEW_NONZERO_LOCATION_ERR: 159 a->nonew = -1; 160 break; 161 case MAT_NEW_NONZERO_ALLOCATION_ERR: 162 a->nonew = -2; 163 break; 164 case MAT_YES_NEW_NONZERO_LOCATIONS: 165 a->nonew = 0; 166 break; 167 case MAT_ROWS_SORTED: 168 case MAT_ROWS_UNSORTED: 169 case MAT_YES_NEW_DIAGONALS: 170 case MAT_IGNORE_OFF_PROC_ENTRIES: 171 case MAT_USE_HASH_TABLE: 172 PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n"); 173 break; 174 case MAT_NO_NEW_DIAGONALS: 175 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 176 case MAT_NOT_SYMMETRIC: 177 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 178 case MAT_HERMITIAN: 179 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 180 case MAT_SYMMETRIC: 181 case MAT_STRUCTURALLY_SYMMETRIC: 182 case MAT_NOT_HERMITIAN: 183 case MAT_SYMMETRY_ETERNAL: 184 case MAT_NOT_SYMMETRY_ETERNAL: 185 break; 186 default: 187 SETERRQ(PETSC_ERR_SUP,"unknown option"); 188 } 189 PetscFunctionReturn(0); 190 } 191 192 #undef __FUNCT__ 193 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 194 PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **v) 195 { 196 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 197 PetscErrorCode ierr; 198 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2; 199 MatScalar *aa,*aa_i; 200 PetscScalar *v_i; 201 202 PetscFunctionBegin; 203 bs = A->bs; 204 ai = a->i; 205 aj = a->j; 206 aa = a->a; 207 bs2 = a->bs2; 208 209 if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %D out of range", row); 210 211 bn = row/bs; /* Block number */ 212 bp = row % bs; /* Block position */ 213 M = ai[bn+1] - ai[bn]; 214 *ncols = bs*M; 215 216 if (v) { 217 *v = 0; 218 if (*ncols) { 219 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 220 for (i=0; i<M; i++) { /* for each block in the block row */ 221 v_i = *v + i*bs; 222 aa_i = aa + bs2*(ai[bn] + i); 223 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 224 } 225 } 226 } 227 228 if (cols) { 229 *cols = 0; 230 if (*ncols) { 231 ierr = PetscMalloc((*ncols+row)*sizeof(PetscInt),cols);CHKERRQ(ierr); 232 for (i=0; i<M; i++) { /* for each block in the block row */ 233 cols_i = *cols + i*bs; 234 itmp = bs*aj[ai[bn] + i]; 235 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 236 } 237 } 238 } 239 240 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 241 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 242 #ifdef column_search 243 v_i = *v + M*bs; 244 cols_i = *cols + M*bs; 245 for (i=0; i<bn; i++){ /* for each block row */ 246 M = ai[i+1] - ai[i]; 247 for (j=0; j<M; j++){ 248 itmp = aj[ai[i] + j]; /* block column value */ 249 if (itmp == bn){ 250 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 251 for (k=0; k<bs; k++) { 252 *cols_i++ = i*bs+k; 253 *v_i++ = aa_i[k]; 254 } 255 *ncols += bs; 256 break; 257 } 258 } 259 } 260 #endif 261 262 PetscFunctionReturn(0); 263 } 264 265 #undef __FUNCT__ 266 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 267 PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 268 { 269 PetscErrorCode ierr; 270 271 PetscFunctionBegin; 272 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 273 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 274 PetscFunctionReturn(0); 275 } 276 277 #undef __FUNCT__ 278 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 279 PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,Mat *B) 280 { 281 PetscErrorCode ierr; 282 PetscFunctionBegin; 283 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 284 PetscFunctionReturn(0); 285 } 286 287 #undef __FUNCT__ 288 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 289 static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 290 { 291 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 292 PetscErrorCode ierr; 293 PetscInt i,j,bs = A->bs,k,l,bs2=a->bs2; 294 char *name; 295 PetscViewerFormat format; 296 297 PetscFunctionBegin; 298 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 299 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 300 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 301 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 302 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 303 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 304 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 305 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 306 for (i=0; i<a->mbs; i++) { 307 for (j=0; j<bs; j++) { 308 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 309 for (k=a->i[i]; k<a->i[i+1]; k++) { 310 for (l=0; l<bs; l++) { 311 #if defined(PETSC_USE_COMPLEX) 312 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 313 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 314 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 315 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 316 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 317 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 318 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 319 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 320 } 321 #else 322 if (a->a[bs2*k + l*bs + j] != 0.0) { 323 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 324 } 325 #endif 326 } 327 } 328 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 329 } 330 } 331 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 332 } else { 333 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 334 for (i=0; i<a->mbs; i++) { 335 for (j=0; j<bs; j++) { 336 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 337 for (k=a->i[i]; k<a->i[i+1]; k++) { 338 for (l=0; l<bs; l++) { 339 #if defined(PETSC_USE_COMPLEX) 340 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 341 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 342 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 343 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 344 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 345 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 346 } else { 347 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 348 } 349 #else 350 ierr = PetscViewerASCIIPrintf(viewer," %D %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 351 #endif 352 } 353 } 354 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 355 } 356 } 357 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 358 } 359 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 360 PetscFunctionReturn(0); 361 } 362 363 #undef __FUNCT__ 364 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 365 static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 366 { 367 Mat A = (Mat) Aa; 368 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 369 PetscErrorCode ierr; 370 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->bs,bs2=a->bs2; 371 PetscMPIInt rank; 372 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 373 MatScalar *aa; 374 MPI_Comm comm; 375 PetscViewer viewer; 376 377 PetscFunctionBegin; 378 /* 379 This is nasty. If this is called from an originally parallel matrix 380 then all processes call this,but only the first has the matrix so the 381 rest should return immediately. 382 */ 383 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 384 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 385 if (rank) PetscFunctionReturn(0); 386 387 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 388 389 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 390 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 391 392 /* loop over matrix elements drawing boxes */ 393 color = PETSC_DRAW_BLUE; 394 for (i=0,row=0; i<mbs; i++,row+=bs) { 395 for (j=a->i[i]; j<a->i[i+1]; j++) { 396 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 397 x_l = a->j[j]*bs; x_r = x_l + 1.0; 398 aa = a->a + j*bs2; 399 for (k=0; k<bs; k++) { 400 for (l=0; l<bs; l++) { 401 if (PetscRealPart(*aa++) >= 0.) continue; 402 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 403 } 404 } 405 } 406 } 407 color = PETSC_DRAW_CYAN; 408 for (i=0,row=0; i<mbs; i++,row+=bs) { 409 for (j=a->i[i]; j<a->i[i+1]; j++) { 410 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 411 x_l = a->j[j]*bs; x_r = x_l + 1.0; 412 aa = a->a + j*bs2; 413 for (k=0; k<bs; k++) { 414 for (l=0; l<bs; l++) { 415 if (PetscRealPart(*aa++) != 0.) continue; 416 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 417 } 418 } 419 } 420 } 421 422 color = PETSC_DRAW_RED; 423 for (i=0,row=0; i<mbs; i++,row+=bs) { 424 for (j=a->i[i]; j<a->i[i+1]; j++) { 425 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 426 x_l = a->j[j]*bs; x_r = x_l + 1.0; 427 aa = a->a + j*bs2; 428 for (k=0; k<bs; k++) { 429 for (l=0; l<bs; l++) { 430 if (PetscRealPart(*aa++) <= 0.) continue; 431 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 432 } 433 } 434 } 435 } 436 PetscFunctionReturn(0); 437 } 438 439 #undef __FUNCT__ 440 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 441 static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 442 { 443 PetscErrorCode ierr; 444 PetscReal xl,yl,xr,yr,w,h; 445 PetscDraw draw; 446 PetscTruth isnull; 447 448 PetscFunctionBegin; 449 450 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 451 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 452 453 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 454 xr = A->m; yr = A->m; h = yr/10.0; w = xr/10.0; 455 xr += w; yr += h; xl = -w; yl = -h; 456 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 457 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 458 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 459 PetscFunctionReturn(0); 460 } 461 462 #undef __FUNCT__ 463 #define __FUNCT__ "MatView_SeqSBAIJ" 464 PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 465 { 466 PetscErrorCode ierr; 467 PetscTruth iascii,isdraw; 468 469 PetscFunctionBegin; 470 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 471 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 472 if (iascii){ 473 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 474 } else if (isdraw) { 475 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 476 } else { 477 Mat B; 478 ierr = MatConvert(A,MATSEQAIJ,&B);CHKERRQ(ierr); 479 ierr = MatView(B,viewer);CHKERRQ(ierr); 480 ierr = MatDestroy(B);CHKERRQ(ierr); 481 } 482 PetscFunctionReturn(0); 483 } 484 485 486 #undef __FUNCT__ 487 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 488 PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 489 { 490 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 491 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 492 PetscInt *ai = a->i,*ailen = a->ilen; 493 PetscInt brow,bcol,ridx,cidx,bs=A->bs,bs2=a->bs2; 494 MatScalar *ap,*aa = a->a,zero = 0.0; 495 496 PetscFunctionBegin; 497 for (k=0; k<m; k++) { /* loop over rows */ 498 row = im[k]; brow = row/bs; 499 if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",row); 500 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1); 501 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 502 nrow = ailen[brow]; 503 for (l=0; l<n; l++) { /* loop over columns */ 504 if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",in[l]); 505 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->n-1); 506 col = in[l] ; 507 bcol = col/bs; 508 cidx = col%bs; 509 ridx = row%bs; 510 high = nrow; 511 low = 0; /* assume unsorted */ 512 while (high-low > 5) { 513 t = (low+high)/2; 514 if (rp[t] > bcol) high = t; 515 else low = t; 516 } 517 for (i=low; i<high; i++) { 518 if (rp[i] > bcol) break; 519 if (rp[i] == bcol) { 520 *v++ = ap[bs2*i+bs*cidx+ridx]; 521 goto finished; 522 } 523 } 524 *v++ = zero; 525 finished:; 526 } 527 } 528 PetscFunctionReturn(0); 529 } 530 531 532 #undef __FUNCT__ 533 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 534 PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 535 { 536 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 537 PetscErrorCode ierr; 538 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 539 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 540 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->bs,stepval; 541 PetscTruth roworiented=a->roworiented; 542 const MatScalar *value = v; 543 MatScalar *ap,*aa = a->a,*bap; 544 545 PetscFunctionBegin; 546 if (roworiented) { 547 stepval = (n-1)*bs; 548 } else { 549 stepval = (m-1)*bs; 550 } 551 for (k=0; k<m; k++) { /* loop over added rows */ 552 row = im[k]; 553 if (row < 0) continue; 554 #if defined(PETSC_USE_DEBUG) 555 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 556 #endif 557 rp = aj + ai[row]; 558 ap = aa + bs2*ai[row]; 559 rmax = imax[row]; 560 nrow = ailen[row]; 561 low = 0; 562 for (l=0; l<n; l++) { /* loop over added columns */ 563 if (in[l] < 0) continue; 564 col = in[l]; 565 #if defined(PETSC_USE_DEBUG) 566 if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",col,a->nbs-1); 567 #endif 568 if (col < row) continue; /* ignore lower triangular block */ 569 if (roworiented) { 570 value = v + k*(stepval+bs)*bs + l*bs; 571 } else { 572 value = v + l*(stepval+bs)*bs + k*bs; 573 } 574 if (!sorted) low = 0; high = nrow; 575 while (high-low > 7) { 576 t = (low+high)/2; 577 if (rp[t] > col) high = t; 578 else low = t; 579 } 580 for (i=low; i<high; i++) { 581 if (rp[i] > col) break; 582 if (rp[i] == col) { 583 bap = ap + bs2*i; 584 if (roworiented) { 585 if (is == ADD_VALUES) { 586 for (ii=0; ii<bs; ii++,value+=stepval) { 587 for (jj=ii; jj<bs2; jj+=bs) { 588 bap[jj] += *value++; 589 } 590 } 591 } else { 592 for (ii=0; ii<bs; ii++,value+=stepval) { 593 for (jj=ii; jj<bs2; jj+=bs) { 594 bap[jj] = *value++; 595 } 596 } 597 } 598 } else { 599 if (is == ADD_VALUES) { 600 for (ii=0; ii<bs; ii++,value+=stepval) { 601 for (jj=0; jj<bs; jj++) { 602 *bap++ += *value++; 603 } 604 } 605 } else { 606 for (ii=0; ii<bs; ii++,value+=stepval) { 607 for (jj=0; jj<bs; jj++) { 608 *bap++ = *value++; 609 } 610 } 611 } 612 } 613 goto noinsert2; 614 } 615 } 616 if (nonew == 1) goto noinsert2; 617 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 618 if (nrow >= rmax) { 619 /* there is no extra room in row, therefore enlarge */ 620 PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 621 MatScalar *new_a; 622 623 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 624 625 /* malloc new storage space */ 626 len = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); 627 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 628 new_j = (PetscInt*)(new_a + bs2*new_nz); 629 new_i = new_j + new_nz; 630 631 /* copy over old data into new slots */ 632 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 633 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 634 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); 635 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 636 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); 637 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 638 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 639 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 640 /* free up old matrix storage */ 641 ierr = PetscFree(a->a);CHKERRQ(ierr); 642 if (!a->singlemalloc) { 643 ierr = PetscFree(a->i);CHKERRQ(ierr); 644 ierr = PetscFree(a->j);CHKERRQ(ierr); 645 } 646 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 647 a->singlemalloc = PETSC_TRUE; 648 649 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 650 rmax = imax[row] = imax[row] + CHUNKSIZE; 651 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar))); 652 a->maxnz += bs2*CHUNKSIZE; 653 a->reallocs++; 654 a->nz++; 655 } 656 N = nrow++ - 1; 657 /* shift up all the later entries in this row */ 658 for (ii=N; ii>=i; ii--) { 659 rp[ii+1] = rp[ii]; 660 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 661 } 662 if (N >= i) { 663 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 664 } 665 rp[i] = col; 666 bap = ap + bs2*i; 667 if (roworiented) { 668 for (ii=0; ii<bs; ii++,value+=stepval) { 669 for (jj=ii; jj<bs2; jj+=bs) { 670 bap[jj] = *value++; 671 } 672 } 673 } else { 674 for (ii=0; ii<bs; ii++,value+=stepval) { 675 for (jj=0; jj<bs; jj++) { 676 *bap++ = *value++; 677 } 678 } 679 } 680 noinsert2:; 681 low = i; 682 } 683 ailen[row] = nrow; 684 } 685 PetscFunctionReturn(0); 686 } 687 688 #undef __FUNCT__ 689 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 690 PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 691 { 692 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 693 PetscErrorCode ierr; 694 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 695 PetscInt m = A->m,*ip,N,*ailen = a->ilen; 696 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 697 MatScalar *aa = a->a,*ap; 698 699 PetscFunctionBegin; 700 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 701 702 if (m) rmax = ailen[0]; 703 for (i=1; i<mbs; i++) { 704 /* move each row back by the amount of empty slots (fshift) before it*/ 705 fshift += imax[i-1] - ailen[i-1]; 706 rmax = PetscMax(rmax,ailen[i]); 707 if (fshift) { 708 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 709 N = ailen[i]; 710 for (j=0; j<N; j++) { 711 ip[j-fshift] = ip[j]; 712 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 713 } 714 } 715 ai[i] = ai[i-1] + ailen[i-1]; 716 } 717 if (mbs) { 718 fshift += imax[mbs-1] - ailen[mbs-1]; 719 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 720 } 721 /* reset ilen and imax for each row */ 722 for (i=0; i<mbs; i++) { 723 ailen[i] = imax[i] = ai[i+1] - ai[i]; 724 } 725 a->nz = ai[mbs]; 726 727 /* diagonals may have moved, reset it */ 728 if (a->diag) { 729 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 730 } 731 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n", 732 m,A->m,A->bs,fshift*bs2,a->nz*bs2); 733 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %D\n", 734 a->reallocs); 735 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %D\n",rmax); 736 a->reallocs = 0; 737 A->info.nz_unneeded = (PetscReal)fshift*bs2; 738 739 PetscFunctionReturn(0); 740 } 741 742 /* 743 This function returns an array of flags which indicate the locations of contiguous 744 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 745 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 746 Assume: sizes should be long enough to hold all the values. 747 */ 748 #undef __FUNCT__ 749 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 750 PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 751 { 752 PetscInt i,j,k,row; 753 PetscTruth flg; 754 755 PetscFunctionBegin; 756 for (i=0,j=0; i<n; j++) { 757 row = idx[i]; 758 if (row%bs!=0) { /* Not the begining of a block */ 759 sizes[j] = 1; 760 i++; 761 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 762 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 763 i++; 764 } else { /* Begining of the block, so check if the complete block exists */ 765 flg = PETSC_TRUE; 766 for (k=1; k<bs; k++) { 767 if (row+k != idx[i+k]) { /* break in the block */ 768 flg = PETSC_FALSE; 769 break; 770 } 771 } 772 if (flg) { /* No break in the bs */ 773 sizes[j] = bs; 774 i+= bs; 775 } else { 776 sizes[j] = 1; 777 i++; 778 } 779 } 780 } 781 *bs_max = j; 782 PetscFunctionReturn(0); 783 } 784 785 786 /* Only add/insert a(i,j) with i<=j (blocks). 787 Any a(i,j) with i>j input by user is ingored. 788 */ 789 790 #undef __FUNCT__ 791 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 792 PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 793 { 794 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 795 PetscErrorCode ierr; 796 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 797 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 798 PetscInt *aj=a->j,nonew=a->nonew,bs=A->bs,brow,bcol; 799 PetscInt ridx,cidx,bs2=a->bs2; 800 MatScalar *ap,value,*aa=a->a,*bap; 801 802 PetscFunctionBegin; 803 804 for (k=0; k<m; k++) { /* loop over added rows */ 805 row = im[k]; /* row number */ 806 brow = row/bs; /* block row number */ 807 if (row < 0) continue; 808 #if defined(PETSC_USE_DEBUG) 809 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1); 810 #endif 811 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 812 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 813 rmax = imax[brow]; /* maximum space allocated for this row */ 814 nrow = ailen[brow]; /* actual length of this row */ 815 low = 0; 816 817 for (l=0; l<n; l++) { /* loop over added columns */ 818 if (in[l] < 0) continue; 819 #if defined(PETSC_USE_DEBUG) 820 if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->m-1); 821 #endif 822 col = in[l]; 823 bcol = col/bs; /* block col number */ 824 825 if (brow <= bcol){ 826 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 827 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 828 /* element value a(k,l) */ 829 if (roworiented) { 830 value = v[l + k*n]; 831 } else { 832 value = v[k + l*m]; 833 } 834 835 /* move pointer bap to a(k,l) quickly and add/insert value */ 836 if (!sorted) low = 0; high = nrow; 837 while (high-low > 7) { 838 t = (low+high)/2; 839 if (rp[t] > bcol) high = t; 840 else low = t; 841 } 842 for (i=low; i<high; i++) { 843 /* printf("The loop of i=low.., rp[%D]=%D\n",i,rp[i]); */ 844 if (rp[i] > bcol) break; 845 if (rp[i] == bcol) { 846 bap = ap + bs2*i + bs*cidx + ridx; 847 if (is == ADD_VALUES) *bap += value; 848 else *bap = value; 849 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 850 if (brow == bcol && ridx < cidx){ 851 bap = ap + bs2*i + bs*ridx + cidx; 852 if (is == ADD_VALUES) *bap += value; 853 else *bap = value; 854 } 855 goto noinsert1; 856 } 857 } 858 859 if (nonew == 1) goto noinsert1; 860 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 861 if (nrow >= rmax) { 862 /* there is no extra room in row, therefore enlarge */ 863 PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 864 MatScalar *new_a; 865 866 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 867 868 /* Malloc new storage space */ 869 len = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); 870 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 871 new_j = (PetscInt*)(new_a + bs2*new_nz); 872 new_i = new_j + new_nz; 873 874 /* copy over old data into new slots */ 875 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 876 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 877 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); 878 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 879 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); 880 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 881 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 882 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 883 /* free up old matrix storage */ 884 ierr = PetscFree(a->a);CHKERRQ(ierr); 885 if (!a->singlemalloc) { 886 ierr = PetscFree(a->i);CHKERRQ(ierr); 887 ierr = PetscFree(a->j);CHKERRQ(ierr); 888 } 889 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 890 a->singlemalloc = PETSC_TRUE; 891 892 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 893 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 894 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar))); 895 a->maxnz += bs2*CHUNKSIZE; 896 a->reallocs++; 897 a->nz++; 898 } 899 900 N = nrow++ - 1; 901 /* shift up all the later entries in this row */ 902 for (ii=N; ii>=i; ii--) { 903 rp[ii+1] = rp[ii]; 904 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 905 } 906 if (N>=i) { 907 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 908 } 909 rp[i] = bcol; 910 ap[bs2*i + bs*cidx + ridx] = value; 911 noinsert1:; 912 low = i; 913 /* } */ 914 } 915 } /* end of if .. if.. */ 916 } /* end of loop over added columns */ 917 ailen[brow] = nrow; 918 } /* end of loop over added rows */ 919 920 PetscFunctionReturn(0); 921 } 922 923 EXTERN PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*); 924 EXTERN PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*); 925 EXTERN PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat,PetscInt,IS[],PetscInt); 926 EXTERN PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,PetscInt,MatReuse,Mat*); 927 EXTERN PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat*[]); 928 EXTERN PetscErrorCode MatScale_SeqSBAIJ(const PetscScalar*,Mat); 929 EXTERN PetscErrorCode MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *); 930 EXTERN PetscErrorCode MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*); 931 EXTERN PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat,Vec); 932 EXTERN PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec); 933 EXTERN PetscErrorCode MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *); 934 EXTERN PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat); 935 EXTERN PetscErrorCode MatGetRowMax_SeqSBAIJ(Mat,Vec); 936 937 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N(Mat,Vec,Vec); 938 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1(Mat,Vec,Vec); 939 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2(Mat,Vec,Vec); 940 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3(Mat,Vec,Vec); 941 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4(Mat,Vec,Vec); 942 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5(Mat,Vec,Vec); 943 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6(Mat,Vec,Vec); 944 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7(Mat,Vec,Vec); 945 946 EXTERN PetscErrorCode MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs); 947 948 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec); 949 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec); 950 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec); 951 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 952 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec); 953 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec); 954 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec); 955 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec); 956 957 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,MatFactorInfo*,Mat*); 958 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,MatFactorInfo*,Mat*); 959 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,MatFactorInfo*,Mat*); 960 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,MatFactorInfo*,Mat*); 961 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,MatFactorInfo*,Mat*); 962 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,MatFactorInfo*,Mat*); 963 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,MatFactorInfo*,Mat*); 964 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,MatFactorInfo*,Mat*); 965 EXTERN PetscErrorCode MatGetInertia_SeqSBAIJ(Mat,PetscInt*,PetscInt*,PetscInt*); 966 967 EXTERN PetscErrorCode MatMult_SeqSBAIJ_1(Mat,Vec,Vec); 968 EXTERN PetscErrorCode MatMult_SeqSBAIJ_2(Mat,Vec,Vec); 969 EXTERN PetscErrorCode MatMult_SeqSBAIJ_3(Mat,Vec,Vec); 970 EXTERN PetscErrorCode MatMult_SeqSBAIJ_4(Mat,Vec,Vec); 971 EXTERN PetscErrorCode MatMult_SeqSBAIJ_5(Mat,Vec,Vec); 972 EXTERN PetscErrorCode MatMult_SeqSBAIJ_6(Mat,Vec,Vec); 973 EXTERN PetscErrorCode MatMult_SeqSBAIJ_7(Mat,Vec,Vec); 974 EXTERN PetscErrorCode MatMult_SeqSBAIJ_N(Mat,Vec,Vec); 975 976 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec); 977 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec); 978 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec); 979 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec); 980 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec); 981 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec); 982 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec); 983 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec); 984 985 #ifdef HAVE_MatICCFactor 986 /* modified from MatILUFactor_SeqSBAIJ, needs further work! */ 987 #undef __FUNCT__ 988 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 989 PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info) 990 { 991 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 992 Mat outA; 993 PetscErrorCode ierr; 994 PetscTruth row_identity,col_identity; 995 996 PetscFunctionBegin; 997 outA = inA; 998 inA->factor = FACTOR_CHOLESKY; 999 1000 if (!a->diag) { 1001 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 1002 } 1003 /* 1004 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1005 for ILU(0) factorization with natural ordering 1006 */ 1007 switch (a->bs) { 1008 case 1: 1009 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1010 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1011 inA->ops->solves = MatSolves_SeqSBAIJ_1; 1012 PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n"); 1013 case 2: 1014 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 1015 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1016 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1017 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1018 break; 1019 case 3: 1020 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 1021 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1022 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1023 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n"); 1024 break; 1025 case 4: 1026 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 1027 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1028 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1029 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1030 break; 1031 case 5: 1032 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 1033 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1034 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1035 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1036 break; 1037 case 6: 1038 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 1039 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1040 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1041 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1042 break; 1043 case 7: 1044 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 1045 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1046 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1047 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1048 break; 1049 default: 1050 a->row = row; 1051 a->icol = col; 1052 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1053 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1054 1055 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1056 ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr); 1057 PetscLogObjectParent(inA,a->icol); 1058 1059 if (!a->solve_work) { 1060 ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1061 PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar)); 1062 } 1063 } 1064 1065 ierr = MatCholeskyFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 1066 1067 PetscFunctionReturn(0); 1068 } 1069 #endif 1070 1071 #undef __FUNCT__ 1072 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ" 1073 PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat A) 1074 { 1075 static PetscTruth called = PETSC_FALSE; 1076 MPI_Comm comm = A->comm; 1077 PetscErrorCode ierr; 1078 1079 PetscFunctionBegin; 1080 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1081 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1082 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1083 PetscFunctionReturn(0); 1084 } 1085 1086 EXTERN_C_BEGIN 1087 #undef __FUNCT__ 1088 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1089 PetscErrorCode MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,PetscInt *indices) 1090 { 1091 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1092 PetscInt i,nz,n; 1093 1094 PetscFunctionBegin; 1095 nz = baij->maxnz; 1096 n = mat->n; 1097 for (i=0; i<nz; i++) { 1098 baij->j[i] = indices[i]; 1099 } 1100 baij->nz = nz; 1101 for (i=0; i<n; i++) { 1102 baij->ilen[i] = baij->imax[i]; 1103 } 1104 1105 PetscFunctionReturn(0); 1106 } 1107 EXTERN_C_END 1108 1109 #undef __FUNCT__ 1110 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1111 /*@ 1112 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1113 in the matrix. 1114 1115 Input Parameters: 1116 + mat - the SeqSBAIJ matrix 1117 - indices - the column indices 1118 1119 Level: advanced 1120 1121 Notes: 1122 This can be called if you have precomputed the nonzero structure of the 1123 matrix and want to provide it to the matrix object to improve the performance 1124 of the MatSetValues() operation. 1125 1126 You MUST have set the correct numbers of nonzeros per row in the call to 1127 MatCreateSeqSBAIJ(). 1128 1129 MUST be called before any calls to MatSetValues() 1130 1131 .seealso: MatCreateSeqSBAIJ 1132 @*/ 1133 PetscErrorCode MatSeqSBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1134 { 1135 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1136 1137 PetscFunctionBegin; 1138 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1139 PetscValidPointer(indices,2); 1140 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1141 if (f) { 1142 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1143 } else { 1144 SETERRQ(PETSC_ERR_SUP,"Wrong type of matrix to set column indices"); 1145 } 1146 PetscFunctionReturn(0); 1147 } 1148 1149 #undef __FUNCT__ 1150 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1151 PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A) 1152 { 1153 PetscErrorCode ierr; 1154 1155 PetscFunctionBegin; 1156 ierr = MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1157 PetscFunctionReturn(0); 1158 } 1159 1160 #undef __FUNCT__ 1161 #define __FUNCT__ "MatGetArray_SeqSBAIJ" 1162 PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1163 { 1164 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1165 PetscFunctionBegin; 1166 *array = a->a; 1167 PetscFunctionReturn(0); 1168 } 1169 1170 #undef __FUNCT__ 1171 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ" 1172 PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1173 { 1174 PetscFunctionBegin; 1175 PetscFunctionReturn(0); 1176 } 1177 1178 #include "petscblaslapack.h" 1179 #undef __FUNCT__ 1180 #define __FUNCT__ "MatAXPY_SeqSBAIJ" 1181 PetscErrorCode MatAXPY_SeqSBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str) 1182 { 1183 Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data; 1184 PetscErrorCode ierr; 1185 PetscInt i,bs=Y->bs,bs2,j; 1186 PetscBLASInt bnz = (PetscBLASInt)x->nz,one = 1; 1187 1188 PetscFunctionBegin; 1189 if (str == SAME_NONZERO_PATTERN) { 1190 BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1191 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1192 if (y->xtoy && y->XtoY != X) { 1193 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1194 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1195 } 1196 if (!y->xtoy) { /* get xtoy */ 1197 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1198 y->XtoY = X; 1199 } 1200 bs2 = bs*bs; 1201 for (i=0; i<x->nz; i++) { 1202 j = 0; 1203 while (j < bs2){ 1204 y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]); 1205 j++; 1206 } 1207 } 1208 PetscLogInfo(0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)); 1209 } else { 1210 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1211 } 1212 PetscFunctionReturn(0); 1213 } 1214 1215 #undef __FUNCT__ 1216 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ" 1217 PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg) 1218 { 1219 PetscFunctionBegin; 1220 *flg = PETSC_TRUE; 1221 PetscFunctionReturn(0); 1222 } 1223 1224 #undef __FUNCT__ 1225 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ" 1226 PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg) 1227 { 1228 PetscFunctionBegin; 1229 *flg = PETSC_TRUE; 1230 PetscFunctionReturn(0); 1231 } 1232 1233 #undef __FUNCT__ 1234 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ" 1235 PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscTruth *flg) 1236 { 1237 PetscFunctionBegin; 1238 *flg = PETSC_FALSE; 1239 PetscFunctionReturn(0); 1240 } 1241 1242 /* -------------------------------------------------------------------*/ 1243 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1244 MatGetRow_SeqSBAIJ, 1245 MatRestoreRow_SeqSBAIJ, 1246 MatMult_SeqSBAIJ_N, 1247 /* 4*/ MatMultAdd_SeqSBAIJ_N, 1248 MatMult_SeqSBAIJ_N, 1249 MatMultAdd_SeqSBAIJ_N, 1250 MatSolve_SeqSBAIJ_N, 1251 0, 1252 0, 1253 /*10*/ 0, 1254 0, 1255 MatCholeskyFactor_SeqSBAIJ, 1256 MatRelax_SeqSBAIJ, 1257 MatTranspose_SeqSBAIJ, 1258 /*15*/ MatGetInfo_SeqSBAIJ, 1259 MatEqual_SeqSBAIJ, 1260 MatGetDiagonal_SeqSBAIJ, 1261 MatDiagonalScale_SeqSBAIJ, 1262 MatNorm_SeqSBAIJ, 1263 /*20*/ 0, 1264 MatAssemblyEnd_SeqSBAIJ, 1265 0, 1266 MatSetOption_SeqSBAIJ, 1267 MatZeroEntries_SeqSBAIJ, 1268 /*25*/ 0, 1269 0, 1270 0, 1271 MatCholeskyFactorSymbolic_SeqSBAIJ, 1272 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1273 /*30*/ MatSetUpPreallocation_SeqSBAIJ, 1274 0, 1275 MatICCFactorSymbolic_SeqSBAIJ, 1276 MatGetArray_SeqSBAIJ, 1277 MatRestoreArray_SeqSBAIJ, 1278 /*35*/ MatDuplicate_SeqSBAIJ, 1279 0, 1280 0, 1281 0, 1282 0, 1283 /*40*/ MatAXPY_SeqSBAIJ, 1284 MatGetSubMatrices_SeqSBAIJ, 1285 MatIncreaseOverlap_SeqSBAIJ, 1286 MatGetValues_SeqSBAIJ, 1287 0, 1288 /*45*/ MatPrintHelp_SeqSBAIJ, 1289 MatScale_SeqSBAIJ, 1290 0, 1291 0, 1292 0, 1293 /*50*/ 0, 1294 MatGetRowIJ_SeqSBAIJ, 1295 MatRestoreRowIJ_SeqSBAIJ, 1296 0, 1297 0, 1298 /*55*/ 0, 1299 0, 1300 0, 1301 0, 1302 MatSetValuesBlocked_SeqSBAIJ, 1303 /*60*/ MatGetSubMatrix_SeqSBAIJ, 1304 0, 1305 0, 1306 MatGetPetscMaps_Petsc, 1307 0, 1308 /*65*/ 0, 1309 0, 1310 0, 1311 0, 1312 0, 1313 /*70*/ MatGetRowMax_SeqSBAIJ, 1314 0, 1315 0, 1316 0, 1317 0, 1318 /*75*/ 0, 1319 0, 1320 0, 1321 0, 1322 0, 1323 /*80*/ 0, 1324 0, 1325 0, 1326 #if !defined(PETSC_USE_COMPLEX) 1327 MatGetInertia_SeqSBAIJ, 1328 #else 1329 0, 1330 #endif 1331 MatLoad_SeqSBAIJ, 1332 /*85*/ MatIsSymmetric_SeqSBAIJ, 1333 MatIsHermitian_SeqSBAIJ, 1334 MatIsStructurallySymmetric_SeqSBAIJ, 1335 0, 1336 0, 1337 /*90*/ 0, 1338 0, 1339 0, 1340 0, 1341 0, 1342 /*95*/ 0, 1343 0, 1344 0, 1345 0}; 1346 1347 EXTERN_C_BEGIN 1348 #undef __FUNCT__ 1349 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1350 PetscErrorCode MatStoreValues_SeqSBAIJ(Mat mat) 1351 { 1352 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1353 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 1354 PetscErrorCode ierr; 1355 1356 PetscFunctionBegin; 1357 if (aij->nonew != 1) { 1358 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1359 } 1360 1361 /* allocate space for values if not already there */ 1362 if (!aij->saved_values) { 1363 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1364 } 1365 1366 /* copy values over */ 1367 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1368 PetscFunctionReturn(0); 1369 } 1370 EXTERN_C_END 1371 1372 EXTERN_C_BEGIN 1373 #undef __FUNCT__ 1374 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1375 PetscErrorCode MatRetrieveValues_SeqSBAIJ(Mat mat) 1376 { 1377 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1378 PetscErrorCode ierr; 1379 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 1380 1381 PetscFunctionBegin; 1382 if (aij->nonew != 1) { 1383 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1384 } 1385 if (!aij->saved_values) { 1386 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 1387 } 1388 1389 /* copy values over */ 1390 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1391 PetscFunctionReturn(0); 1392 } 1393 EXTERN_C_END 1394 1395 EXTERN_C_BEGIN 1396 #undef __FUNCT__ 1397 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ" 1398 PetscErrorCode MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 1399 { 1400 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1401 PetscErrorCode ierr; 1402 PetscInt i,len,mbs,bs2; 1403 PetscTruth flg; 1404 1405 PetscFunctionBegin; 1406 B->preallocated = PETSC_TRUE; 1407 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1408 mbs = B->m/bs; 1409 bs2 = bs*bs; 1410 1411 if (mbs*bs != B->m) { 1412 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1413 } 1414 1415 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1416 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 1417 if (nnz) { 1418 for (i=0; i<mbs; i++) { 1419 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 1420 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],mbs); 1421 } 1422 } 1423 1424 ierr = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1425 if (!flg) { 1426 switch (bs) { 1427 case 1: 1428 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1429 B->ops->solve = MatSolve_SeqSBAIJ_1; 1430 B->ops->solves = MatSolves_SeqSBAIJ_1; 1431 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 1432 B->ops->mult = MatMult_SeqSBAIJ_1; 1433 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1434 break; 1435 case 2: 1436 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1437 B->ops->solve = MatSolve_SeqSBAIJ_2; 1438 B->ops->solvetranspose = MatSolve_SeqSBAIJ_2; 1439 B->ops->mult = MatMult_SeqSBAIJ_2; 1440 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1441 break; 1442 case 3: 1443 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1444 B->ops->solve = MatSolve_SeqSBAIJ_3; 1445 B->ops->solvetranspose = MatSolve_SeqSBAIJ_3; 1446 B->ops->mult = MatMult_SeqSBAIJ_3; 1447 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1448 break; 1449 case 4: 1450 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1451 B->ops->solve = MatSolve_SeqSBAIJ_4; 1452 B->ops->solvetranspose = MatSolve_SeqSBAIJ_4; 1453 B->ops->mult = MatMult_SeqSBAIJ_4; 1454 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1455 break; 1456 case 5: 1457 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1458 B->ops->solve = MatSolve_SeqSBAIJ_5; 1459 B->ops->solvetranspose = MatSolve_SeqSBAIJ_5; 1460 B->ops->mult = MatMult_SeqSBAIJ_5; 1461 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1462 break; 1463 case 6: 1464 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1465 B->ops->solve = MatSolve_SeqSBAIJ_6; 1466 B->ops->solvetranspose = MatSolve_SeqSBAIJ_6; 1467 B->ops->mult = MatMult_SeqSBAIJ_6; 1468 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1469 break; 1470 case 7: 1471 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1472 B->ops->solve = MatSolve_SeqSBAIJ_7; 1473 B->ops->solvetranspose = MatSolve_SeqSBAIJ_7; 1474 B->ops->mult = MatMult_SeqSBAIJ_7; 1475 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1476 break; 1477 } 1478 } 1479 1480 b->mbs = mbs; 1481 b->nbs = mbs; 1482 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->imax);CHKERRQ(ierr); 1483 if (!nnz) { 1484 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1485 else if (nz <= 0) nz = 1; 1486 for (i=0; i<mbs; i++) { 1487 b->imax[i] = nz; 1488 } 1489 nz = nz*mbs; /* total nz */ 1490 } else { 1491 nz = 0; 1492 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1493 } 1494 /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1495 1496 /* allocate the matrix space */ 1497 len = nz*sizeof(PetscInt) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(PetscInt); 1498 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1499 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1500 b->j = (PetscInt*)(b->a + nz*bs2); 1501 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1502 b->i = b->j + nz; 1503 b->singlemalloc = PETSC_TRUE; 1504 1505 /* pointer to beginning of each row */ 1506 b->i[0] = 0; 1507 for (i=1; i<mbs+1; i++) { 1508 b->i[i] = b->i[i-1] + b->imax[i-1]; 1509 } 1510 1511 /* b->ilen will count nonzeros in each block row so far. */ 1512 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->ilen);CHKERRQ(ierr); 1513 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1514 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1515 1516 B->bs = bs; 1517 b->bs2 = bs2; 1518 b->nz = 0; 1519 b->maxnz = nz*bs2; 1520 1521 b->inew = 0; 1522 b->jnew = 0; 1523 b->anew = 0; 1524 b->a2anew = 0; 1525 b->permute = PETSC_FALSE; 1526 PetscFunctionReturn(0); 1527 } 1528 EXTERN_C_END 1529 1530 EXTERN_C_BEGIN 1531 EXTERN PetscErrorCode MatConvert_SeqSBAIJ_SeqAIJ(Mat,const MatType,Mat*); 1532 EXTERN PetscErrorCode MatConvert_SeqSBAIJ_SeqBAIJ(Mat,const MatType,Mat*); 1533 EXTERN_C_END 1534 1535 /*MC 1536 MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices, 1537 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1538 1539 Options Database Keys: 1540 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions() 1541 1542 Level: beginner 1543 1544 .seealso: MatCreateSeqSBAIJ 1545 M*/ 1546 1547 EXTERN_C_BEGIN 1548 #undef __FUNCT__ 1549 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1550 PetscErrorCode MatCreate_SeqSBAIJ(Mat B) 1551 { 1552 Mat_SeqSBAIJ *b; 1553 PetscErrorCode ierr; 1554 PetscMPIInt size; 1555 1556 PetscFunctionBegin; 1557 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1558 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1559 B->m = B->M = PetscMax(B->m,B->M); 1560 B->n = B->N = PetscMax(B->n,B->N); 1561 1562 ierr = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1563 B->data = (void*)b; 1564 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1565 B->ops->destroy = MatDestroy_SeqSBAIJ; 1566 B->ops->view = MatView_SeqSBAIJ; 1567 B->factor = 0; 1568 B->lupivotthreshold = 1.0; 1569 B->mapping = 0; 1570 b->row = 0; 1571 b->icol = 0; 1572 b->reallocs = 0; 1573 b->saved_values = 0; 1574 1575 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1576 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1577 1578 b->sorted = PETSC_FALSE; 1579 b->roworiented = PETSC_TRUE; 1580 b->nonew = 0; 1581 b->diag = 0; 1582 b->solve_work = 0; 1583 b->mult_work = 0; 1584 B->spptr = 0; 1585 b->keepzeroedrows = PETSC_FALSE; 1586 b->xtoy = 0; 1587 b->XtoY = 0; 1588 1589 b->inew = 0; 1590 b->jnew = 0; 1591 b->anew = 0; 1592 b->a2anew = 0; 1593 b->permute = PETSC_FALSE; 1594 1595 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1596 "MatStoreValues_SeqSBAIJ", 1597 MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1598 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1599 "MatRetrieveValues_SeqSBAIJ", 1600 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1601 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1602 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1603 MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1604 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C", 1605 "MatConvert_SeqSBAIJ_SeqAIJ", 1606 MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr); 1607 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C", 1608 "MatConvert_SeqSBAIJ_SeqBAIJ", 1609 MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr); 1610 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C", 1611 "MatSeqSBAIJSetPreallocation_SeqSBAIJ", 1612 MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr); 1613 1614 B->symmetric = PETSC_TRUE; 1615 B->structurally_symmetric = PETSC_TRUE; 1616 B->symmetric_set = PETSC_TRUE; 1617 B->structurally_symmetric_set = PETSC_TRUE; 1618 PetscFunctionReturn(0); 1619 } 1620 EXTERN_C_END 1621 1622 #undef __FUNCT__ 1623 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1624 /*@C 1625 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1626 compressed row) format. For good matrix assembly performance the 1627 user should preallocate the matrix storage by setting the parameter nz 1628 (or the array nnz). By setting these parameters accurately, performance 1629 during matrix assembly can be increased by more than a factor of 50. 1630 1631 Collective on Mat 1632 1633 Input Parameters: 1634 + A - the symmetric matrix 1635 . bs - size of block 1636 . nz - number of block nonzeros per block row (same for all rows) 1637 - nnz - array containing the number of block nonzeros in the upper triangular plus 1638 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1639 1640 Options Database Keys: 1641 . -mat_no_unroll - uses code that does not unroll the loops in the 1642 block calculations (much slower) 1643 . -mat_block_size - size of the blocks to use 1644 1645 Level: intermediate 1646 1647 Notes: 1648 Specify the preallocated storage with either nz or nnz (not both). 1649 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1650 allocation. For additional details, see the users manual chapter on 1651 matrices. 1652 1653 If the nnz parameter is given then the nz parameter is ignored 1654 1655 1656 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1657 @*/ 1658 PetscErrorCode MatSeqSBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 1659 { 1660 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 1661 1662 PetscFunctionBegin; 1663 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1664 if (f) { 1665 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 1666 } 1667 PetscFunctionReturn(0); 1668 } 1669 1670 #undef __FUNCT__ 1671 #define __FUNCT__ "MatCreateSeqSBAIJ" 1672 /*@C 1673 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1674 compressed row) format. For good matrix assembly performance the 1675 user should preallocate the matrix storage by setting the parameter nz 1676 (or the array nnz). By setting these parameters accurately, performance 1677 during matrix assembly can be increased by more than a factor of 50. 1678 1679 Collective on MPI_Comm 1680 1681 Input Parameters: 1682 + comm - MPI communicator, set to PETSC_COMM_SELF 1683 . bs - size of block 1684 . m - number of rows, or number of columns 1685 . nz - number of block nonzeros per block row (same for all rows) 1686 - nnz - array containing the number of block nonzeros in the upper triangular plus 1687 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1688 1689 Output Parameter: 1690 . A - the symmetric matrix 1691 1692 Options Database Keys: 1693 . -mat_no_unroll - uses code that does not unroll the loops in the 1694 block calculations (much slower) 1695 . -mat_block_size - size of the blocks to use 1696 1697 Level: intermediate 1698 1699 Notes: 1700 1701 Specify the preallocated storage with either nz or nnz (not both). 1702 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1703 allocation. For additional details, see the users manual chapter on 1704 matrices. 1705 1706 If the nnz parameter is given then the nz parameter is ignored 1707 1708 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1709 @*/ 1710 PetscErrorCode MatCreateSeqSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 1711 { 1712 PetscErrorCode ierr; 1713 1714 PetscFunctionBegin; 1715 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1716 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1717 ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1718 PetscFunctionReturn(0); 1719 } 1720 1721 #undef __FUNCT__ 1722 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1723 PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1724 { 1725 Mat C; 1726 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1727 PetscErrorCode ierr; 1728 PetscInt i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 1729 1730 PetscFunctionBegin; 1731 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1732 1733 *B = 0; 1734 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1735 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 1736 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1737 c = (Mat_SeqSBAIJ*)C->data; 1738 1739 C->preallocated = PETSC_TRUE; 1740 C->factor = A->factor; 1741 c->row = 0; 1742 c->icol = 0; 1743 c->saved_values = 0; 1744 c->keepzeroedrows = a->keepzeroedrows; 1745 C->assembled = PETSC_TRUE; 1746 1747 C->M = A->M; 1748 C->N = A->N; 1749 C->bs = A->bs; 1750 c->bs2 = a->bs2; 1751 c->mbs = a->mbs; 1752 c->nbs = a->nbs; 1753 1754 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->imax);CHKERRQ(ierr); 1755 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->ilen);CHKERRQ(ierr); 1756 for (i=0; i<mbs; i++) { 1757 c->imax[i] = a->imax[i]; 1758 c->ilen[i] = a->ilen[i]; 1759 } 1760 1761 /* allocate the matrix space */ 1762 c->singlemalloc = PETSC_TRUE; 1763 len = (mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt)); 1764 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1765 c->j = (PetscInt*)(c->a + nz*bs2); 1766 c->i = c->j + nz; 1767 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1768 if (mbs > 0) { 1769 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1770 if (cpvalues == MAT_COPY_VALUES) { 1771 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1772 } else { 1773 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1774 } 1775 } 1776 1777 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1778 c->sorted = a->sorted; 1779 c->roworiented = a->roworiented; 1780 c->nonew = a->nonew; 1781 1782 if (a->diag) { 1783 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 1784 PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt)); 1785 for (i=0; i<mbs; i++) { 1786 c->diag[i] = a->diag[i]; 1787 } 1788 } else c->diag = 0; 1789 c->nz = a->nz; 1790 c->maxnz = a->maxnz; 1791 c->solve_work = 0; 1792 c->mult_work = 0; 1793 *B = C; 1794 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1795 PetscFunctionReturn(0); 1796 } 1797 1798 #undef __FUNCT__ 1799 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1800 PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer,const MatType type,Mat *A) 1801 { 1802 Mat_SeqSBAIJ *a; 1803 Mat B; 1804 PetscErrorCode ierr; 1805 int fd; 1806 PetscMPIInt size; 1807 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 1808 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 1809 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 1810 PetscInt *masked,nmask,tmp,bs2,ishift; 1811 PetscScalar *aa; 1812 MPI_Comm comm = ((PetscObject)viewer)->comm; 1813 1814 PetscFunctionBegin; 1815 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1816 bs2 = bs*bs; 1817 1818 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1819 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1820 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1821 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1822 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1823 M = header[1]; N = header[2]; nz = header[3]; 1824 1825 if (header[3] < 0) { 1826 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1827 } 1828 1829 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1830 1831 /* 1832 This code adds extra rows to make sure the number of rows is 1833 divisible by the blocksize 1834 */ 1835 mbs = M/bs; 1836 extra_rows = bs - M + bs*(mbs); 1837 if (extra_rows == bs) extra_rows = 0; 1838 else mbs++; 1839 if (extra_rows) { 1840 PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n"); 1841 } 1842 1843 /* read in row lengths */ 1844 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1845 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1846 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1847 1848 /* read in column indices */ 1849 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1850 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1851 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1852 1853 /* loop over row lengths determining block row lengths */ 1854 ierr = PetscMalloc(mbs*sizeof(PetscInt),&s_browlengths);CHKERRQ(ierr); 1855 ierr = PetscMemzero(s_browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1856 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 1857 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1858 masked = mask + mbs; 1859 rowcount = 0; nzcount = 0; 1860 for (i=0; i<mbs; i++) { 1861 nmask = 0; 1862 for (j=0; j<bs; j++) { 1863 kmax = rowlengths[rowcount]; 1864 for (k=0; k<kmax; k++) { 1865 tmp = jj[nzcount++]/bs; /* block col. index */ 1866 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1867 } 1868 rowcount++; 1869 } 1870 s_browlengths[i] += nmask; 1871 1872 /* zero out the mask elements we set */ 1873 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1874 } 1875 1876 /* create our matrix */ 1877 ierr = MatCreate(comm,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows,&B);CHKERRQ(ierr); 1878 ierr = MatSetType(B,type);CHKERRQ(ierr); 1879 ierr = MatSeqSBAIJSetPreallocation(B,bs,0,s_browlengths);CHKERRQ(ierr); 1880 a = (Mat_SeqSBAIJ*)B->data; 1881 1882 /* set matrix "i" values */ 1883 a->i[0] = 0; 1884 for (i=1; i<= mbs; i++) { 1885 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1886 a->ilen[i-1] = s_browlengths[i-1]; 1887 } 1888 a->nz = a->i[mbs]; 1889 1890 /* read in nonzero values */ 1891 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1892 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1893 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1894 1895 /* set "a" and "j" values into matrix */ 1896 nzcount = 0; jcount = 0; 1897 for (i=0; i<mbs; i++) { 1898 nzcountb = nzcount; 1899 nmask = 0; 1900 for (j=0; j<bs; j++) { 1901 kmax = rowlengths[i*bs+j]; 1902 for (k=0; k<kmax; k++) { 1903 tmp = jj[nzcount++]/bs; /* block col. index */ 1904 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1905 } 1906 } 1907 /* sort the masked values */ 1908 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1909 1910 /* set "j" values into matrix */ 1911 maskcount = 1; 1912 for (j=0; j<nmask; j++) { 1913 a->j[jcount++] = masked[j]; 1914 mask[masked[j]] = maskcount++; 1915 } 1916 1917 /* set "a" values into matrix */ 1918 ishift = bs2*a->i[i]; 1919 for (j=0; j<bs; j++) { 1920 kmax = rowlengths[i*bs+j]; 1921 for (k=0; k<kmax; k++) { 1922 tmp = jj[nzcountb]/bs ; /* block col. index */ 1923 if (tmp >= i){ 1924 block = mask[tmp] - 1; 1925 point = jj[nzcountb] - bs*tmp; 1926 idx = ishift + bs2*block + j + bs*point; 1927 a->a[idx] = aa[nzcountb]; 1928 } 1929 nzcountb++; 1930 } 1931 } 1932 /* zero out the mask elements we set */ 1933 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1934 } 1935 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1936 1937 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1938 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1939 ierr = PetscFree(aa);CHKERRQ(ierr); 1940 ierr = PetscFree(jj);CHKERRQ(ierr); 1941 ierr = PetscFree(mask);CHKERRQ(ierr); 1942 1943 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1944 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1945 ierr = MatView_Private(B);CHKERRQ(ierr); 1946 *A = B; 1947 PetscFunctionReturn(0); 1948 } 1949 1950 #undef __FUNCT__ 1951 #define __FUNCT__ "MatRelax_SeqSBAIJ" 1952 PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1953 { 1954 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1955 MatScalar *aa=a->a,*v,*v1; 1956 PetscScalar *x,*b,*t,sum,d; 1957 PetscErrorCode ierr; 1958 PetscInt m=a->mbs,bs=A->bs,*ai=a->i,*aj=a->j; 1959 PetscInt nz,nz1,*vj,*vj1,i; 1960 1961 PetscFunctionBegin; 1962 its = its*lits; 1963 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1964 1965 if (bs > 1) 1966 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 1967 1968 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1969 if (xx != bb) { 1970 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1971 } else { 1972 b = x; 1973 } 1974 1975 ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr); 1976 1977 if (flag & SOR_ZERO_INITIAL_GUESS) { 1978 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1979 for (i=0; i<m; i++) 1980 t[i] = b[i]; 1981 1982 for (i=0; i<m; i++){ 1983 d = *(aa + ai[i]); /* diag[i] */ 1984 v = aa + ai[i] + 1; 1985 vj = aj + ai[i] + 1; 1986 nz = ai[i+1] - ai[i] - 1; 1987 x[i] = omega*t[i]/d; 1988 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 1989 PetscLogFlops(2*nz-1); 1990 } 1991 } 1992 1993 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 1994 for (i=0; i<m; i++) 1995 t[i] = b[i]; 1996 1997 for (i=0; i<m-1; i++){ /* update rhs */ 1998 v = aa + ai[i] + 1; 1999 vj = aj + ai[i] + 1; 2000 nz = ai[i+1] - ai[i] - 1; 2001 while (nz--) t[*vj++] -= x[i]*(*v++); 2002 PetscLogFlops(2*nz-1); 2003 } 2004 for (i=m-1; i>=0; i--){ 2005 d = *(aa + ai[i]); 2006 v = aa + ai[i] + 1; 2007 vj = aj + ai[i] + 1; 2008 nz = ai[i+1] - ai[i] - 1; 2009 sum = t[i]; 2010 while (nz--) sum -= x[*vj++]*(*v++); 2011 PetscLogFlops(2*nz-1); 2012 x[i] = (1-omega)*x[i] + omega*sum/d; 2013 } 2014 } 2015 its--; 2016 } 2017 2018 while (its--) { 2019 /* 2020 forward sweep: 2021 for i=0,...,m-1: 2022 sum[i] = (b[i] - U(i,:)x )/d[i]; 2023 x[i] = (1-omega)x[i] + omega*sum[i]; 2024 b = b - x[i]*U^T(i,:); 2025 2026 */ 2027 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2028 for (i=0; i<m; i++) 2029 t[i] = b[i]; 2030 2031 for (i=0; i<m; i++){ 2032 d = *(aa + ai[i]); /* diag[i] */ 2033 v = aa + ai[i] + 1; v1=v; 2034 vj = aj + ai[i] + 1; vj1=vj; 2035 nz = ai[i+1] - ai[i] - 1; nz1=nz; 2036 sum = t[i]; 2037 while (nz1--) sum -= (*v1++)*x[*vj1++]; 2038 x[i] = (1-omega)*x[i] + omega*sum/d; 2039 while (nz--) t[*vj++] -= x[i]*(*v++); 2040 PetscLogFlops(4*nz-2); 2041 } 2042 } 2043 2044 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2045 /* 2046 backward sweep: 2047 b = b - x[i]*U^T(i,:), i=0,...,n-2 2048 for i=m-1,...,0: 2049 sum[i] = (b[i] - U(i,:)x )/d[i]; 2050 x[i] = (1-omega)x[i] + omega*sum[i]; 2051 */ 2052 for (i=0; i<m; i++) 2053 t[i] = b[i]; 2054 2055 for (i=0; i<m-1; i++){ /* update rhs */ 2056 v = aa + ai[i] + 1; 2057 vj = aj + ai[i] + 1; 2058 nz = ai[i+1] - ai[i] - 1; 2059 while (nz--) t[*vj++] -= x[i]*(*v++); 2060 PetscLogFlops(2*nz-1); 2061 } 2062 for (i=m-1; i>=0; i--){ 2063 d = *(aa + ai[i]); 2064 v = aa + ai[i] + 1; 2065 vj = aj + ai[i] + 1; 2066 nz = ai[i+1] - ai[i] - 1; 2067 sum = t[i]; 2068 while (nz--) sum -= x[*vj++]*(*v++); 2069 PetscLogFlops(2*nz-1); 2070 x[i] = (1-omega)*x[i] + omega*sum/d; 2071 } 2072 } 2073 } 2074 2075 ierr = PetscFree(t);CHKERRQ(ierr); 2076 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2077 if (bb != xx) { 2078 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2079 } 2080 2081 PetscFunctionReturn(0); 2082 } 2083 2084 2085 2086 2087 2088 2089