1 2 /* 3 Defines the basic matrix operations for the SBAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 7 #include "src/inline/spops.h" 8 #include "src/mat/impls/sbaij/seq/sbaij.h" 9 10 #define CHUNKSIZE 10 11 12 /* 13 Checks for missing diagonals 14 */ 15 #undef __FUNCT__ 16 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 17 PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A) 18 { 19 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 20 PetscErrorCode ierr; 21 PetscInt *diag,*jj = a->j,i; 22 23 PetscFunctionBegin; 24 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 25 diag = a->diag; 26 for (i=0; i<a->mbs; i++) { 27 if (jj[diag[i]] != i) SETERRQ1(PETSC_ERR_ARG_CORRUPT,"Matrix is missing diagonal number %D",i); 28 } 29 PetscFunctionReturn(0); 30 } 31 32 #undef __FUNCT__ 33 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 34 PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A) 35 { 36 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 37 PetscErrorCode ierr; 38 PetscInt i,mbs = a->mbs; 39 40 PetscFunctionBegin; 41 if (a->diag) PetscFunctionReturn(0); 42 43 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 44 PetscLogObjectMemory(A,(mbs+1)*sizeof(PetscInt)); 45 for (i=0; i<mbs; i++) a->diag[i] = a->i[i]; 46 PetscFunctionReturn(0); 47 } 48 49 #undef __FUNCT__ 50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 51 static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 52 { 53 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 54 PetscInt n = a->mbs,i; 55 56 PetscFunctionBegin; 57 *nn = n; 58 if (!ia) PetscFunctionReturn(0); 59 60 if (oshift == 1) { 61 /* temporarily add 1 to i and j indices */ 62 PetscInt nz = a->i[n]; 63 for (i=0; i<nz; i++) a->j[i]++; 64 for (i=0; i<n+1; i++) a->i[i]++; 65 *ia = a->i; *ja = a->j; 66 } else { 67 *ia = a->i; *ja = a->j; 68 } 69 PetscFunctionReturn(0); 70 } 71 72 #undef __FUNCT__ 73 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 74 static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 75 { 76 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 77 PetscInt i,n = a->mbs; 78 79 PetscFunctionBegin; 80 if (!ia) PetscFunctionReturn(0); 81 82 if (oshift == 1) { 83 PetscInt nz = a->i[n]-1; 84 for (i=0; i<nz; i++) a->j[i]--; 85 for (i=0; i<n+1; i++) a->i[i]--; 86 } 87 PetscFunctionReturn(0); 88 } 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 92 PetscErrorCode MatDestroy_SeqSBAIJ(Mat A) 93 { 94 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 95 PetscErrorCode ierr; 96 97 PetscFunctionBegin; 98 PetscLogObjectState((PetscObject)A,"Rows=%D, NZ=%D",A->m,a->nz); 99 ierr = PetscFree(a->a);CHKERRQ(ierr); 100 if (!a->singlemalloc) { 101 ierr = PetscFree(a->i);CHKERRQ(ierr); 102 ierr = PetscFree(a->j);CHKERRQ(ierr); 103 } 104 if (a->row) { 105 ierr = ISDestroy(a->row);CHKERRQ(ierr); 106 } 107 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 108 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 109 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 110 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 111 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 112 if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);} 113 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 114 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 115 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 116 117 if (a->inew){ 118 ierr = PetscFree(a->inew);CHKERRQ(ierr); 119 a->inew = 0; 120 } 121 ierr = PetscFree(a);CHKERRQ(ierr); 122 123 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 124 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 125 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 126 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 127 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqbaij_C","",PETSC_NULL);CHKERRQ(ierr); 128 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 129 PetscFunctionReturn(0); 130 } 131 132 #undef __FUNCT__ 133 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 134 PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op) 135 { 136 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 137 138 PetscFunctionBegin; 139 switch (op) { 140 case MAT_ROW_ORIENTED: 141 a->roworiented = PETSC_TRUE; 142 break; 143 case MAT_COLUMN_ORIENTED: 144 a->roworiented = PETSC_FALSE; 145 break; 146 case MAT_COLUMNS_SORTED: 147 a->sorted = PETSC_TRUE; 148 break; 149 case MAT_COLUMNS_UNSORTED: 150 a->sorted = PETSC_FALSE; 151 break; 152 case MAT_KEEP_ZEROED_ROWS: 153 a->keepzeroedrows = PETSC_TRUE; 154 break; 155 case MAT_NO_NEW_NONZERO_LOCATIONS: 156 a->nonew = 1; 157 break; 158 case MAT_NEW_NONZERO_LOCATION_ERR: 159 a->nonew = -1; 160 break; 161 case MAT_NEW_NONZERO_ALLOCATION_ERR: 162 a->nonew = -2; 163 break; 164 case MAT_YES_NEW_NONZERO_LOCATIONS: 165 a->nonew = 0; 166 break; 167 case MAT_ROWS_SORTED: 168 case MAT_ROWS_UNSORTED: 169 case MAT_YES_NEW_DIAGONALS: 170 case MAT_IGNORE_OFF_PROC_ENTRIES: 171 case MAT_USE_HASH_TABLE: 172 PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n"); 173 break; 174 case MAT_NO_NEW_DIAGONALS: 175 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 176 case MAT_NOT_SYMMETRIC: 177 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 178 case MAT_HERMITIAN: 179 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 180 case MAT_SYMMETRIC: 181 case MAT_STRUCTURALLY_SYMMETRIC: 182 case MAT_NOT_HERMITIAN: 183 case MAT_SYMMETRY_ETERNAL: 184 case MAT_NOT_SYMMETRY_ETERNAL: 185 break; 186 default: 187 SETERRQ(PETSC_ERR_SUP,"unknown option"); 188 } 189 PetscFunctionReturn(0); 190 } 191 192 #undef __FUNCT__ 193 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 194 PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **v) 195 { 196 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 197 PetscErrorCode ierr; 198 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2; 199 MatScalar *aa,*aa_i; 200 PetscScalar *v_i; 201 202 PetscFunctionBegin; 203 bs = A->bs; 204 ai = a->i; 205 aj = a->j; 206 aa = a->a; 207 bs2 = a->bs2; 208 209 if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %D out of range", row); 210 211 bn = row/bs; /* Block number */ 212 bp = row % bs; /* Block position */ 213 M = ai[bn+1] - ai[bn]; 214 *ncols = bs*M; 215 216 if (v) { 217 *v = 0; 218 if (*ncols) { 219 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 220 for (i=0; i<M; i++) { /* for each block in the block row */ 221 v_i = *v + i*bs; 222 aa_i = aa + bs2*(ai[bn] + i); 223 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 224 } 225 } 226 } 227 228 if (cols) { 229 *cols = 0; 230 if (*ncols) { 231 ierr = PetscMalloc((*ncols+row)*sizeof(PetscInt),cols);CHKERRQ(ierr); 232 for (i=0; i<M; i++) { /* for each block in the block row */ 233 cols_i = *cols + i*bs; 234 itmp = bs*aj[ai[bn] + i]; 235 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 236 } 237 } 238 } 239 240 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 241 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 242 #ifdef column_search 243 v_i = *v + M*bs; 244 cols_i = *cols + M*bs; 245 for (i=0; i<bn; i++){ /* for each block row */ 246 M = ai[i+1] - ai[i]; 247 for (j=0; j<M; j++){ 248 itmp = aj[ai[i] + j]; /* block column value */ 249 if (itmp == bn){ 250 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 251 for (k=0; k<bs; k++) { 252 *cols_i++ = i*bs+k; 253 *v_i++ = aa_i[k]; 254 } 255 *ncols += bs; 256 break; 257 } 258 } 259 } 260 #endif 261 262 PetscFunctionReturn(0); 263 } 264 265 #undef __FUNCT__ 266 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 267 PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 268 { 269 PetscErrorCode ierr; 270 271 PetscFunctionBegin; 272 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 273 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 274 PetscFunctionReturn(0); 275 } 276 277 #undef __FUNCT__ 278 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 279 PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,Mat *B) 280 { 281 PetscErrorCode ierr; 282 PetscFunctionBegin; 283 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 284 PetscFunctionReturn(0); 285 } 286 287 #undef __FUNCT__ 288 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 289 static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 290 { 291 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 292 PetscErrorCode ierr; 293 PetscInt i,j,bs = A->bs,k,l,bs2=a->bs2; 294 char *name; 295 PetscViewerFormat format; 296 297 PetscFunctionBegin; 298 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 299 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 300 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 301 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 302 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 303 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 304 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 305 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 306 for (i=0; i<a->mbs; i++) { 307 for (j=0; j<bs; j++) { 308 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 309 for (k=a->i[i]; k<a->i[i+1]; k++) { 310 for (l=0; l<bs; l++) { 311 #if defined(PETSC_USE_COMPLEX) 312 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 313 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 314 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 315 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 316 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 317 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 318 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 319 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 320 } 321 #else 322 if (a->a[bs2*k + l*bs + j] != 0.0) { 323 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 324 } 325 #endif 326 } 327 } 328 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 329 } 330 } 331 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 332 } else { 333 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 334 for (i=0; i<a->mbs; i++) { 335 for (j=0; j<bs; j++) { 336 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 337 for (k=a->i[i]; k<a->i[i+1]; k++) { 338 for (l=0; l<bs; l++) { 339 #if defined(PETSC_USE_COMPLEX) 340 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 341 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 342 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 343 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 344 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 345 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 346 } else { 347 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 348 } 349 #else 350 ierr = PetscViewerASCIIPrintf(viewer," %D %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 351 #endif 352 } 353 } 354 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 355 } 356 } 357 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 358 } 359 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 360 PetscFunctionReturn(0); 361 } 362 363 #undef __FUNCT__ 364 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 365 static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 366 { 367 Mat A = (Mat) Aa; 368 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 369 PetscErrorCode ierr; 370 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->bs,bs2=a->bs2; 371 PetscMPIInt rank; 372 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 373 MatScalar *aa; 374 MPI_Comm comm; 375 PetscViewer viewer; 376 377 PetscFunctionBegin; 378 /* 379 This is nasty. If this is called from an originally parallel matrix 380 then all processes call this,but only the first has the matrix so the 381 rest should return immediately. 382 */ 383 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 384 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 385 if (rank) PetscFunctionReturn(0); 386 387 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 388 389 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 390 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 391 392 /* loop over matrix elements drawing boxes */ 393 color = PETSC_DRAW_BLUE; 394 for (i=0,row=0; i<mbs; i++,row+=bs) { 395 for (j=a->i[i]; j<a->i[i+1]; j++) { 396 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 397 x_l = a->j[j]*bs; x_r = x_l + 1.0; 398 aa = a->a + j*bs2; 399 for (k=0; k<bs; k++) { 400 for (l=0; l<bs; l++) { 401 if (PetscRealPart(*aa++) >= 0.) continue; 402 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 403 } 404 } 405 } 406 } 407 color = PETSC_DRAW_CYAN; 408 for (i=0,row=0; i<mbs; i++,row+=bs) { 409 for (j=a->i[i]; j<a->i[i+1]; j++) { 410 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 411 x_l = a->j[j]*bs; x_r = x_l + 1.0; 412 aa = a->a + j*bs2; 413 for (k=0; k<bs; k++) { 414 for (l=0; l<bs; l++) { 415 if (PetscRealPart(*aa++) != 0.) continue; 416 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 417 } 418 } 419 } 420 } 421 422 color = PETSC_DRAW_RED; 423 for (i=0,row=0; i<mbs; i++,row+=bs) { 424 for (j=a->i[i]; j<a->i[i+1]; j++) { 425 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 426 x_l = a->j[j]*bs; x_r = x_l + 1.0; 427 aa = a->a + j*bs2; 428 for (k=0; k<bs; k++) { 429 for (l=0; l<bs; l++) { 430 if (PetscRealPart(*aa++) <= 0.) continue; 431 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 432 } 433 } 434 } 435 } 436 PetscFunctionReturn(0); 437 } 438 439 #undef __FUNCT__ 440 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 441 static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 442 { 443 PetscErrorCode ierr; 444 PetscReal xl,yl,xr,yr,w,h; 445 PetscDraw draw; 446 PetscTruth isnull; 447 448 PetscFunctionBegin; 449 450 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 451 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 452 453 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 454 xr = A->m; yr = A->m; h = yr/10.0; w = xr/10.0; 455 xr += w; yr += h; xl = -w; yl = -h; 456 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 457 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 458 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 459 PetscFunctionReturn(0); 460 } 461 462 #undef __FUNCT__ 463 #define __FUNCT__ "MatView_SeqSBAIJ" 464 PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 465 { 466 PetscErrorCode ierr; 467 PetscTruth iascii,isdraw; 468 469 PetscFunctionBegin; 470 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 471 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 472 if (iascii){ 473 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 474 } else if (isdraw) { 475 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 476 } else { 477 Mat B; 478 ierr = MatConvert(A,MATSEQAIJ,&B);CHKERRQ(ierr); 479 ierr = MatView(B,viewer);CHKERRQ(ierr); 480 ierr = MatDestroy(B);CHKERRQ(ierr); 481 } 482 PetscFunctionReturn(0); 483 } 484 485 486 #undef __FUNCT__ 487 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 488 PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 489 { 490 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 491 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 492 PetscInt *ai = a->i,*ailen = a->ilen; 493 PetscInt brow,bcol,ridx,cidx,bs=A->bs,bs2=a->bs2; 494 MatScalar *ap,*aa = a->a,zero = 0.0; 495 496 PetscFunctionBegin; 497 for (k=0; k<m; k++) { /* loop over rows */ 498 row = im[k]; brow = row/bs; 499 if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",row); 500 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1); 501 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 502 nrow = ailen[brow]; 503 for (l=0; l<n; l++) { /* loop over columns */ 504 if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",in[l]); 505 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->n-1); 506 col = in[l] ; 507 bcol = col/bs; 508 cidx = col%bs; 509 ridx = row%bs; 510 high = nrow; 511 low = 0; /* assume unsorted */ 512 while (high-low > 5) { 513 t = (low+high)/2; 514 if (rp[t] > bcol) high = t; 515 else low = t; 516 } 517 for (i=low; i<high; i++) { 518 if (rp[i] > bcol) break; 519 if (rp[i] == bcol) { 520 *v++ = ap[bs2*i+bs*cidx+ridx]; 521 goto finished; 522 } 523 } 524 *v++ = zero; 525 finished:; 526 } 527 } 528 PetscFunctionReturn(0); 529 } 530 531 532 #undef __FUNCT__ 533 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 534 PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 535 { 536 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 537 PetscErrorCode ierr; 538 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 539 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 540 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->bs,stepval; 541 PetscTruth roworiented=a->roworiented; 542 const MatScalar *value = v; 543 MatScalar *ap,*aa = a->a,*bap; 544 545 PetscFunctionBegin; 546 if (roworiented) { 547 stepval = (n-1)*bs; 548 } else { 549 stepval = (m-1)*bs; 550 } 551 for (k=0; k<m; k++) { /* loop over added rows */ 552 row = im[k]; 553 if (row < 0) continue; 554 #if defined(PETSC_USE_DEBUG) 555 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 556 #endif 557 rp = aj + ai[row]; 558 ap = aa + bs2*ai[row]; 559 rmax = imax[row]; 560 nrow = ailen[row]; 561 low = 0; 562 for (l=0; l<n; l++) { /* loop over added columns */ 563 if (in[l] < 0) continue; 564 col = in[l]; 565 #if defined(PETSC_USE_DEBUG) 566 if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",col,a->nbs-1); 567 #endif 568 if (col < row) continue; /* ignore lower triangular block */ 569 if (roworiented) { 570 value = v + k*(stepval+bs)*bs + l*bs; 571 } else { 572 value = v + l*(stepval+bs)*bs + k*bs; 573 } 574 if (!sorted) low = 0; high = nrow; 575 while (high-low > 7) { 576 t = (low+high)/2; 577 if (rp[t] > col) high = t; 578 else low = t; 579 } 580 for (i=low; i<high; i++) { 581 if (rp[i] > col) break; 582 if (rp[i] == col) { 583 bap = ap + bs2*i; 584 if (roworiented) { 585 if (is == ADD_VALUES) { 586 for (ii=0; ii<bs; ii++,value+=stepval) { 587 for (jj=ii; jj<bs2; jj+=bs) { 588 bap[jj] += *value++; 589 } 590 } 591 } else { 592 for (ii=0; ii<bs; ii++,value+=stepval) { 593 for (jj=ii; jj<bs2; jj+=bs) { 594 bap[jj] = *value++; 595 } 596 } 597 } 598 } else { 599 if (is == ADD_VALUES) { 600 for (ii=0; ii<bs; ii++,value+=stepval) { 601 for (jj=0; jj<bs; jj++) { 602 *bap++ += *value++; 603 } 604 } 605 } else { 606 for (ii=0; ii<bs; ii++,value+=stepval) { 607 for (jj=0; jj<bs; jj++) { 608 *bap++ = *value++; 609 } 610 } 611 } 612 } 613 goto noinsert2; 614 } 615 } 616 if (nonew == 1) goto noinsert2; 617 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 618 if (nrow >= rmax) { 619 /* there is no extra room in row, therefore enlarge */ 620 PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 621 MatScalar *new_a; 622 623 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 624 625 /* malloc new storage space */ 626 len = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); 627 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 628 new_j = (PetscInt*)(new_a + bs2*new_nz); 629 new_i = new_j + new_nz; 630 631 /* copy over old data into new slots */ 632 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 633 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 634 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); 635 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 636 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); 637 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 638 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 639 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 640 /* free up old matrix storage */ 641 ierr = PetscFree(a->a);CHKERRQ(ierr); 642 if (!a->singlemalloc) { 643 ierr = PetscFree(a->i);CHKERRQ(ierr); 644 ierr = PetscFree(a->j);CHKERRQ(ierr); 645 } 646 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 647 a->singlemalloc = PETSC_TRUE; 648 649 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 650 rmax = imax[row] = imax[row] + CHUNKSIZE; 651 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar))); 652 a->maxnz += bs2*CHUNKSIZE; 653 a->reallocs++; 654 a->nz++; 655 } 656 N = nrow++ - 1; 657 /* shift up all the later entries in this row */ 658 for (ii=N; ii>=i; ii--) { 659 rp[ii+1] = rp[ii]; 660 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 661 } 662 if (N >= i) { 663 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 664 } 665 rp[i] = col; 666 bap = ap + bs2*i; 667 if (roworiented) { 668 for (ii=0; ii<bs; ii++,value+=stepval) { 669 for (jj=ii; jj<bs2; jj+=bs) { 670 bap[jj] = *value++; 671 } 672 } 673 } else { 674 for (ii=0; ii<bs; ii++,value+=stepval) { 675 for (jj=0; jj<bs; jj++) { 676 *bap++ = *value++; 677 } 678 } 679 } 680 noinsert2:; 681 low = i; 682 } 683 ailen[row] = nrow; 684 } 685 PetscFunctionReturn(0); 686 } 687 688 #undef __FUNCT__ 689 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 690 PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 691 { 692 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 693 PetscErrorCode ierr; 694 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 695 PetscInt m = A->m,*ip,N,*ailen = a->ilen; 696 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 697 MatScalar *aa = a->a,*ap; 698 699 PetscFunctionBegin; 700 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 701 702 if (m) rmax = ailen[0]; 703 for (i=1; i<mbs; i++) { 704 /* move each row back by the amount of empty slots (fshift) before it*/ 705 fshift += imax[i-1] - ailen[i-1]; 706 rmax = PetscMax(rmax,ailen[i]); 707 if (fshift) { 708 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 709 N = ailen[i]; 710 for (j=0; j<N; j++) { 711 ip[j-fshift] = ip[j]; 712 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 713 } 714 } 715 ai[i] = ai[i-1] + ailen[i-1]; 716 } 717 if (mbs) { 718 fshift += imax[mbs-1] - ailen[mbs-1]; 719 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 720 } 721 /* reset ilen and imax for each row */ 722 for (i=0; i<mbs; i++) { 723 ailen[i] = imax[i] = ai[i+1] - ai[i]; 724 } 725 a->nz = ai[mbs]; 726 727 /* diagonals may have moved, reset it */ 728 if (a->diag) { 729 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 730 } 731 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n", 732 m,A->m,A->bs,fshift*bs2,a->nz*bs2); 733 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %D\n", 734 a->reallocs); 735 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %D\n",rmax); 736 a->reallocs = 0; 737 A->info.nz_unneeded = (PetscReal)fshift*bs2; 738 739 PetscFunctionReturn(0); 740 } 741 742 /* 743 This function returns an array of flags which indicate the locations of contiguous 744 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 745 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 746 Assume: sizes should be long enough to hold all the values. 747 */ 748 #undef __FUNCT__ 749 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 750 PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 751 { 752 PetscInt i,j,k,row; 753 PetscTruth flg; 754 755 PetscFunctionBegin; 756 for (i=0,j=0; i<n; j++) { 757 row = idx[i]; 758 if (row%bs!=0) { /* Not the begining of a block */ 759 sizes[j] = 1; 760 i++; 761 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 762 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 763 i++; 764 } else { /* Begining of the block, so check if the complete block exists */ 765 flg = PETSC_TRUE; 766 for (k=1; k<bs; k++) { 767 if (row+k != idx[i+k]) { /* break in the block */ 768 flg = PETSC_FALSE; 769 break; 770 } 771 } 772 if (flg == PETSC_TRUE) { /* No break in the bs */ 773 sizes[j] = bs; 774 i+= bs; 775 } else { 776 sizes[j] = 1; 777 i++; 778 } 779 } 780 } 781 *bs_max = j; 782 PetscFunctionReturn(0); 783 } 784 785 786 /* Only add/insert a(i,j) with i<=j (blocks). 787 Any a(i,j) with i>j input by user is ingored. 788 */ 789 790 #undef __FUNCT__ 791 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 792 PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 793 { 794 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 795 PetscErrorCode ierr; 796 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 797 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 798 PetscInt *aj=a->j,nonew=a->nonew,bs=A->bs,brow,bcol; 799 PetscInt ridx,cidx,bs2=a->bs2; 800 MatScalar *ap,value,*aa=a->a,*bap; 801 802 PetscFunctionBegin; 803 804 for (k=0; k<m; k++) { /* loop over added rows */ 805 row = im[k]; /* row number */ 806 brow = row/bs; /* block row number */ 807 if (row < 0) continue; 808 #if defined(PETSC_USE_DEBUG) 809 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1); 810 #endif 811 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 812 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 813 rmax = imax[brow]; /* maximum space allocated for this row */ 814 nrow = ailen[brow]; /* actual length of this row */ 815 low = 0; 816 817 for (l=0; l<n; l++) { /* loop over added columns */ 818 if (in[l] < 0) continue; 819 #if defined(PETSC_USE_DEBUG) 820 if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->m-1); 821 #endif 822 col = in[l]; 823 bcol = col/bs; /* block col number */ 824 825 if (brow <= bcol){ 826 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 827 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 828 /* element value a(k,l) */ 829 if (roworiented) { 830 value = v[l + k*n]; 831 } else { 832 value = v[k + l*m]; 833 } 834 835 /* move pointer bap to a(k,l) quickly and add/insert value */ 836 if (!sorted) low = 0; high = nrow; 837 while (high-low > 7) { 838 t = (low+high)/2; 839 if (rp[t] > bcol) high = t; 840 else low = t; 841 } 842 for (i=low; i<high; i++) { 843 /* printf("The loop of i=low.., rp[%D]=%D\n",i,rp[i]); */ 844 if (rp[i] > bcol) break; 845 if (rp[i] == bcol) { 846 bap = ap + bs2*i + bs*cidx + ridx; 847 if (is == ADD_VALUES) *bap += value; 848 else *bap = value; 849 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 850 if (brow == bcol && ridx < cidx){ 851 bap = ap + bs2*i + bs*ridx + cidx; 852 if (is == ADD_VALUES) *bap += value; 853 else *bap = value; 854 } 855 goto noinsert1; 856 } 857 } 858 859 if (nonew == 1) goto noinsert1; 860 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 861 if (nrow >= rmax) { 862 /* there is no extra room in row, therefore enlarge */ 863 PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 864 MatScalar *new_a; 865 866 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 867 868 /* Malloc new storage space */ 869 len = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); 870 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 871 new_j = (PetscInt*)(new_a + bs2*new_nz); 872 new_i = new_j + new_nz; 873 874 /* copy over old data into new slots */ 875 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 876 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 877 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); 878 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 879 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); 880 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 881 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 882 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 883 /* free up old matrix storage */ 884 ierr = PetscFree(a->a);CHKERRQ(ierr); 885 if (!a->singlemalloc) { 886 ierr = PetscFree(a->i);CHKERRQ(ierr); 887 ierr = PetscFree(a->j);CHKERRQ(ierr); 888 } 889 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 890 a->singlemalloc = PETSC_TRUE; 891 892 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 893 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 894 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar))); 895 a->maxnz += bs2*CHUNKSIZE; 896 a->reallocs++; 897 a->nz++; 898 } 899 900 N = nrow++ - 1; 901 /* shift up all the later entries in this row */ 902 for (ii=N; ii>=i; ii--) { 903 rp[ii+1] = rp[ii]; 904 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 905 } 906 if (N>=i) { 907 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 908 } 909 rp[i] = bcol; 910 ap[bs2*i + bs*cidx + ridx] = value; 911 noinsert1:; 912 low = i; 913 /* } */ 914 } 915 } /* end of if .. if.. */ 916 } /* end of loop over added columns */ 917 ailen[brow] = nrow; 918 } /* end of loop over added rows */ 919 920 PetscFunctionReturn(0); 921 } 922 923 EXTERN PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*); 924 EXTERN PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*); 925 EXTERN PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat,PetscInt,IS[],PetscInt); 926 EXTERN PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,PetscInt,MatReuse,Mat*); 927 EXTERN PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat*[]); 928 EXTERN PetscErrorCode MatScale_SeqSBAIJ(const PetscScalar*,Mat); 929 EXTERN PetscErrorCode MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *); 930 EXTERN PetscErrorCode MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*); 931 EXTERN PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat,Vec); 932 EXTERN PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec); 933 EXTERN PetscErrorCode MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *); 934 EXTERN PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat); 935 EXTERN PetscErrorCode MatGetRowMax_SeqSBAIJ(Mat,Vec); 936 937 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N(Mat,Vec,Vec); 938 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1(Mat,Vec,Vec); 939 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2(Mat,Vec,Vec); 940 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3(Mat,Vec,Vec); 941 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4(Mat,Vec,Vec); 942 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5(Mat,Vec,Vec); 943 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6(Mat,Vec,Vec); 944 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7(Mat,Vec,Vec); 945 946 EXTERN PetscErrorCode MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs); 947 948 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec); 949 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec); 950 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec); 951 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 952 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec); 953 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec); 954 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec); 955 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec); 956 957 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*); 958 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*); 959 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*); 960 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*); 961 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*); 962 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*); 963 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*); 964 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*); 965 EXTERN PetscErrorCode MatGetInertia_SeqSBAIJ(Mat,PetscInt*,PetscInt*,PetscInt*); 966 967 EXTERN PetscErrorCode MatMult_SeqSBAIJ_1(Mat,Vec,Vec); 968 EXTERN PetscErrorCode MatMult_SeqSBAIJ_2(Mat,Vec,Vec); 969 EXTERN PetscErrorCode MatMult_SeqSBAIJ_3(Mat,Vec,Vec); 970 EXTERN PetscErrorCode MatMult_SeqSBAIJ_4(Mat,Vec,Vec); 971 EXTERN PetscErrorCode MatMult_SeqSBAIJ_5(Mat,Vec,Vec); 972 EXTERN PetscErrorCode MatMult_SeqSBAIJ_6(Mat,Vec,Vec); 973 EXTERN PetscErrorCode MatMult_SeqSBAIJ_7(Mat,Vec,Vec); 974 EXTERN PetscErrorCode MatMult_SeqSBAIJ_N(Mat,Vec,Vec); 975 976 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec); 977 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec); 978 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec); 979 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec); 980 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec); 981 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec); 982 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec); 983 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec); 984 985 #ifdef HAVE_MatICCFactor 986 /* modified from MatILUFactor_SeqSBAIJ, needs further work! */ 987 #undef __FUNCT__ 988 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 989 PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info) 990 { 991 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 992 Mat outA; 993 PetscErrorCode ierr; 994 PetscTruth row_identity,col_identity; 995 996 PetscFunctionBegin; 997 outA = inA; 998 inA->factor = FACTOR_CHOLESKY; 999 1000 if (!a->diag) { 1001 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 1002 } 1003 /* 1004 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1005 for ILU(0) factorization with natural ordering 1006 */ 1007 switch (a->bs) { 1008 case 1: 1009 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1010 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1011 inA->ops->solves = MatSolves_SeqSBAIJ_1; 1012 PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n"); 1013 case 2: 1014 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 1015 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1016 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1017 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1018 break; 1019 case 3: 1020 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 1021 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1022 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1023 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n"); 1024 break; 1025 case 4: 1026 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 1027 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1028 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1029 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1030 break; 1031 case 5: 1032 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 1033 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1034 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1035 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1036 break; 1037 case 6: 1038 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 1039 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1040 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1041 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1042 break; 1043 case 7: 1044 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 1045 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1046 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1047 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1048 break; 1049 default: 1050 a->row = row; 1051 a->icol = col; 1052 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1053 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1054 1055 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1056 ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr); 1057 PetscLogObjectParent(inA,a->icol); 1058 1059 if (!a->solve_work) { 1060 ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1061 PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar)); 1062 } 1063 } 1064 1065 ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr); 1066 1067 PetscFunctionReturn(0); 1068 } 1069 #endif 1070 1071 #undef __FUNCT__ 1072 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ" 1073 PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat A) 1074 { 1075 static PetscTruth called = PETSC_FALSE; 1076 MPI_Comm comm = A->comm; 1077 PetscErrorCode ierr; 1078 1079 PetscFunctionBegin; 1080 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1081 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1082 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1083 PetscFunctionReturn(0); 1084 } 1085 1086 EXTERN_C_BEGIN 1087 #undef __FUNCT__ 1088 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1089 PetscErrorCode MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,PetscInt *indices) 1090 { 1091 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1092 PetscInt i,nz,n; 1093 1094 PetscFunctionBegin; 1095 nz = baij->maxnz; 1096 n = mat->n; 1097 for (i=0; i<nz; i++) { 1098 baij->j[i] = indices[i]; 1099 } 1100 baij->nz = nz; 1101 for (i=0; i<n; i++) { 1102 baij->ilen[i] = baij->imax[i]; 1103 } 1104 1105 PetscFunctionReturn(0); 1106 } 1107 EXTERN_C_END 1108 1109 #undef __FUNCT__ 1110 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1111 /*@ 1112 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1113 in the matrix. 1114 1115 Input Parameters: 1116 + mat - the SeqSBAIJ matrix 1117 - indices - the column indices 1118 1119 Level: advanced 1120 1121 Notes: 1122 This can be called if you have precomputed the nonzero structure of the 1123 matrix and want to provide it to the matrix object to improve the performance 1124 of the MatSetValues() operation. 1125 1126 You MUST have set the correct numbers of nonzeros per row in the call to 1127 MatCreateSeqSBAIJ(). 1128 1129 MUST be called before any calls to MatSetValues() 1130 1131 .seealso: MatCreateSeqSBAIJ 1132 @*/ 1133 PetscErrorCode MatSeqSBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1134 { 1135 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1136 1137 PetscFunctionBegin; 1138 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1139 PetscValidPointer(indices,2); 1140 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1141 if (f) { 1142 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1143 } else { 1144 SETERRQ(PETSC_ERR_SUP,"Wrong type of matrix to set column indices"); 1145 } 1146 PetscFunctionReturn(0); 1147 } 1148 1149 #undef __FUNCT__ 1150 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1151 PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A) 1152 { 1153 PetscErrorCode ierr; 1154 1155 PetscFunctionBegin; 1156 ierr = MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1157 PetscFunctionReturn(0); 1158 } 1159 1160 #undef __FUNCT__ 1161 #define __FUNCT__ "MatGetArray_SeqSBAIJ" 1162 PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1163 { 1164 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1165 PetscFunctionBegin; 1166 *array = a->a; 1167 PetscFunctionReturn(0); 1168 } 1169 1170 #undef __FUNCT__ 1171 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ" 1172 PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1173 { 1174 PetscFunctionBegin; 1175 PetscFunctionReturn(0); 1176 } 1177 1178 #include "petscblaslapack.h" 1179 #undef __FUNCT__ 1180 #define __FUNCT__ "MatAXPY_SeqSBAIJ" 1181 PetscErrorCode MatAXPY_SeqSBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str) 1182 { 1183 Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data; 1184 PetscErrorCode ierr; 1185 PetscInt i,bs=Y->bs,bs2,j; 1186 PetscBLASInt bnz = (PetscBLASInt)x->nz,one = 1; 1187 1188 PetscFunctionBegin; 1189 if (str == SAME_NONZERO_PATTERN) { 1190 BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1191 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1192 if (y->xtoy && y->XtoY != X) { 1193 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1194 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1195 } 1196 if (!y->xtoy) { /* get xtoy */ 1197 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1198 y->XtoY = X; 1199 } 1200 bs2 = bs*bs; 1201 for (i=0; i<x->nz; i++) { 1202 j = 0; 1203 while (j < bs2){ 1204 y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]); 1205 j++; 1206 } 1207 } 1208 PetscLogInfo(0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)); 1209 } else { 1210 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1211 } 1212 PetscFunctionReturn(0); 1213 } 1214 1215 #undef __FUNCT__ 1216 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ" 1217 PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg) 1218 { 1219 PetscFunctionBegin; 1220 *flg = PETSC_TRUE; 1221 PetscFunctionReturn(0); 1222 } 1223 1224 #undef __FUNCT__ 1225 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ" 1226 PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg) 1227 { 1228 PetscFunctionBegin; 1229 *flg = PETSC_TRUE; 1230 PetscFunctionReturn(0); 1231 } 1232 1233 #undef __FUNCT__ 1234 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ" 1235 PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscTruth *flg) 1236 { 1237 PetscFunctionBegin; 1238 *flg = PETSC_FALSE; 1239 PetscFunctionReturn(0); 1240 } 1241 1242 /* -------------------------------------------------------------------*/ 1243 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1244 MatGetRow_SeqSBAIJ, 1245 MatRestoreRow_SeqSBAIJ, 1246 MatMult_SeqSBAIJ_N, 1247 /* 4*/ MatMultAdd_SeqSBAIJ_N, 1248 MatMult_SeqSBAIJ_N, 1249 MatMultAdd_SeqSBAIJ_N, 1250 MatSolve_SeqSBAIJ_N, 1251 0, 1252 0, 1253 /*10*/ 0, 1254 0, 1255 MatCholeskyFactor_SeqSBAIJ, 1256 MatRelax_SeqSBAIJ, 1257 MatTranspose_SeqSBAIJ, 1258 /*15*/ MatGetInfo_SeqSBAIJ, 1259 MatEqual_SeqSBAIJ, 1260 MatGetDiagonal_SeqSBAIJ, 1261 MatDiagonalScale_SeqSBAIJ, 1262 MatNorm_SeqSBAIJ, 1263 /*20*/ 0, 1264 MatAssemblyEnd_SeqSBAIJ, 1265 0, 1266 MatSetOption_SeqSBAIJ, 1267 MatZeroEntries_SeqSBAIJ, 1268 /*25*/ 0, 1269 0, 1270 0, 1271 MatCholeskyFactorSymbolic_SeqSBAIJ, 1272 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1273 /*30*/ MatSetUpPreallocation_SeqSBAIJ, 1274 0, 1275 MatICCFactorSymbolic_SeqSBAIJ, 1276 MatGetArray_SeqSBAIJ, 1277 MatRestoreArray_SeqSBAIJ, 1278 /*35*/ MatDuplicate_SeqSBAIJ, 1279 0, 1280 0, 1281 0, 1282 0, 1283 /*40*/ MatAXPY_SeqSBAIJ, 1284 MatGetSubMatrices_SeqSBAIJ, 1285 MatIncreaseOverlap_SeqSBAIJ, 1286 MatGetValues_SeqSBAIJ, 1287 0, 1288 /*45*/ MatPrintHelp_SeqSBAIJ, 1289 MatScale_SeqSBAIJ, 1290 0, 1291 0, 1292 0, 1293 /*50*/ 0, 1294 MatGetRowIJ_SeqSBAIJ, 1295 MatRestoreRowIJ_SeqSBAIJ, 1296 0, 1297 0, 1298 /*55*/ 0, 1299 0, 1300 0, 1301 0, 1302 MatSetValuesBlocked_SeqSBAIJ, 1303 /*60*/ MatGetSubMatrix_SeqSBAIJ, 1304 0, 1305 0, 1306 MatGetPetscMaps_Petsc, 1307 0, 1308 /*65*/ 0, 1309 0, 1310 0, 1311 0, 1312 0, 1313 /*70*/ MatGetRowMax_SeqSBAIJ, 1314 0, 1315 0, 1316 0, 1317 0, 1318 /*75*/ 0, 1319 0, 1320 0, 1321 0, 1322 0, 1323 /*80*/ 0, 1324 0, 1325 0, 1326 #if !defined(PETSC_USE_COMPLEX) 1327 MatGetInertia_SeqSBAIJ, 1328 #else 1329 0, 1330 #endif 1331 MatLoad_SeqSBAIJ, 1332 /*85*/ MatIsSymmetric_SeqSBAIJ, 1333 MatIsHermitian_SeqSBAIJ, 1334 MatIsStructurallySymmetric_SeqSBAIJ, 1335 0, 1336 0, 1337 /*90*/ 0, 1338 0, 1339 0, 1340 0, 1341 0, 1342 /*95*/ 0, 1343 0, 1344 0, 1345 0}; 1346 1347 EXTERN_C_BEGIN 1348 #undef __FUNCT__ 1349 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1350 PetscErrorCode MatStoreValues_SeqSBAIJ(Mat mat) 1351 { 1352 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1353 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 1354 PetscErrorCode ierr; 1355 1356 PetscFunctionBegin; 1357 if (aij->nonew != 1) { 1358 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1359 } 1360 1361 /* allocate space for values if not already there */ 1362 if (!aij->saved_values) { 1363 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1364 } 1365 1366 /* copy values over */ 1367 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1368 PetscFunctionReturn(0); 1369 } 1370 EXTERN_C_END 1371 1372 EXTERN_C_BEGIN 1373 #undef __FUNCT__ 1374 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1375 PetscErrorCode MatRetrieveValues_SeqSBAIJ(Mat mat) 1376 { 1377 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1378 PetscErrorCode ierr; 1379 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 1380 1381 PetscFunctionBegin; 1382 if (aij->nonew != 1) { 1383 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1384 } 1385 if (!aij->saved_values) { 1386 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 1387 } 1388 1389 /* copy values over */ 1390 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1391 PetscFunctionReturn(0); 1392 } 1393 EXTERN_C_END 1394 1395 EXTERN_C_BEGIN 1396 #undef __FUNCT__ 1397 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ" 1398 PetscErrorCode MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 1399 { 1400 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1401 PetscErrorCode ierr; 1402 PetscInt i,len,mbs,bs2; 1403 PetscTruth flg; 1404 1405 PetscFunctionBegin; 1406 B->preallocated = PETSC_TRUE; 1407 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1408 mbs = B->m/bs; 1409 bs2 = bs*bs; 1410 1411 if (mbs*bs != B->m) { 1412 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1413 } 1414 1415 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1416 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 1417 if (nnz) { 1418 for (i=0; i<mbs; i++) { 1419 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 1420 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],mbs); 1421 } 1422 } 1423 1424 ierr = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1425 if (!flg) { 1426 switch (bs) { 1427 case 1: 1428 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1429 B->ops->solve = MatSolve_SeqSBAIJ_1; 1430 B->ops->solves = MatSolves_SeqSBAIJ_1; 1431 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 1432 B->ops->mult = MatMult_SeqSBAIJ_1; 1433 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1434 break; 1435 case 2: 1436 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1437 B->ops->solve = MatSolve_SeqSBAIJ_2; 1438 B->ops->solvetranspose = MatSolve_SeqSBAIJ_2; 1439 B->ops->mult = MatMult_SeqSBAIJ_2; 1440 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1441 break; 1442 case 3: 1443 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1444 B->ops->solve = MatSolve_SeqSBAIJ_3; 1445 B->ops->solvetranspose = MatSolve_SeqSBAIJ_3; 1446 B->ops->mult = MatMult_SeqSBAIJ_3; 1447 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1448 break; 1449 case 4: 1450 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1451 B->ops->solve = MatSolve_SeqSBAIJ_4; 1452 B->ops->solvetranspose = MatSolve_SeqSBAIJ_4; 1453 B->ops->mult = MatMult_SeqSBAIJ_4; 1454 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1455 break; 1456 case 5: 1457 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1458 B->ops->solve = MatSolve_SeqSBAIJ_5; 1459 B->ops->solvetranspose = MatSolve_SeqSBAIJ_5; 1460 B->ops->mult = MatMult_SeqSBAIJ_5; 1461 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1462 break; 1463 case 6: 1464 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1465 B->ops->solve = MatSolve_SeqSBAIJ_6; 1466 B->ops->solvetranspose = MatSolve_SeqSBAIJ_6; 1467 B->ops->mult = MatMult_SeqSBAIJ_6; 1468 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1469 break; 1470 case 7: 1471 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1472 B->ops->solve = MatSolve_SeqSBAIJ_7; 1473 B->ops->solvetranspose = MatSolve_SeqSBAIJ_7; 1474 B->ops->mult = MatMult_SeqSBAIJ_7; 1475 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1476 break; 1477 } 1478 } 1479 1480 b->mbs = mbs; 1481 b->nbs = mbs; 1482 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->imax);CHKERRQ(ierr); 1483 if (!nnz) { 1484 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1485 else if (nz <= 0) nz = 1; 1486 for (i=0; i<mbs; i++) { 1487 b->imax[i] = nz; 1488 } 1489 nz = nz*mbs; /* total nz */ 1490 } else { 1491 nz = 0; 1492 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1493 } 1494 /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1495 1496 /* allocate the matrix space */ 1497 len = nz*sizeof(PetscInt) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(PetscInt); 1498 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1499 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1500 b->j = (PetscInt*)(b->a + nz*bs2); 1501 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1502 b->i = b->j + nz; 1503 b->singlemalloc = PETSC_TRUE; 1504 1505 /* pointer to beginning of each row */ 1506 b->i[0] = 0; 1507 for (i=1; i<mbs+1; i++) { 1508 b->i[i] = b->i[i-1] + b->imax[i-1]; 1509 } 1510 1511 /* b->ilen will count nonzeros in each block row so far. */ 1512 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->ilen);CHKERRQ(ierr); 1513 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1514 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1515 1516 B->bs = bs; 1517 b->bs2 = bs2; 1518 b->nz = 0; 1519 b->maxnz = nz*bs2; 1520 1521 b->inew = 0; 1522 b->jnew = 0; 1523 b->anew = 0; 1524 b->a2anew = 0; 1525 b->permute = PETSC_FALSE; 1526 PetscFunctionReturn(0); 1527 } 1528 EXTERN_C_END 1529 1530 EXTERN_C_BEGIN 1531 EXTERN PetscErrorCode MatConvert_SeqSBAIJ_SeqAIJ(Mat,const MatType,Mat*); 1532 EXTERN PetscErrorCode MatConvert_SeqSBAIJ_SeqBAIJ(Mat,const MatType,Mat*); 1533 EXTERN_C_END 1534 1535 /*MC 1536 MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices, 1537 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1538 1539 Options Database Keys: 1540 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions() 1541 1542 Level: beginner 1543 1544 .seealso: MatCreateSeqSBAIJ 1545 M*/ 1546 1547 EXTERN_C_BEGIN 1548 #undef __FUNCT__ 1549 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1550 PetscErrorCode MatCreate_SeqSBAIJ(Mat B) 1551 { 1552 Mat_SeqSBAIJ *b; 1553 PetscErrorCode ierr; 1554 PetscMPIInt size; 1555 1556 PetscFunctionBegin; 1557 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1558 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1559 B->m = B->M = PetscMax(B->m,B->M); 1560 B->n = B->N = PetscMax(B->n,B->N); 1561 1562 ierr = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1563 B->data = (void*)b; 1564 ierr = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr); 1565 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1566 B->ops->destroy = MatDestroy_SeqSBAIJ; 1567 B->ops->view = MatView_SeqSBAIJ; 1568 B->factor = 0; 1569 B->lupivotthreshold = 1.0; 1570 B->mapping = 0; 1571 b->row = 0; 1572 b->icol = 0; 1573 b->reallocs = 0; 1574 b->saved_values = 0; 1575 1576 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1577 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1578 1579 b->sorted = PETSC_FALSE; 1580 b->roworiented = PETSC_TRUE; 1581 b->nonew = 0; 1582 b->diag = 0; 1583 b->solve_work = 0; 1584 b->mult_work = 0; 1585 B->spptr = 0; 1586 b->keepzeroedrows = PETSC_FALSE; 1587 b->xtoy = 0; 1588 b->XtoY = 0; 1589 1590 b->inew = 0; 1591 b->jnew = 0; 1592 b->anew = 0; 1593 b->a2anew = 0; 1594 b->permute = PETSC_FALSE; 1595 1596 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1597 "MatStoreValues_SeqSBAIJ", 1598 MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1599 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1600 "MatRetrieveValues_SeqSBAIJ", 1601 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1602 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1603 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1604 MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1605 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C", 1606 "MatConvert_SeqSBAIJ_SeqAIJ", 1607 MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr); 1608 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C", 1609 "MatConvert_SeqSBAIJ_SeqBAIJ", 1610 MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr); 1611 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C", 1612 "MatSeqSBAIJSetPreallocation_SeqSBAIJ", 1613 MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr); 1614 1615 B->symmetric = PETSC_TRUE; 1616 B->structurally_symmetric = PETSC_TRUE; 1617 B->symmetric_set = PETSC_TRUE; 1618 B->structurally_symmetric_set = PETSC_TRUE; 1619 PetscFunctionReturn(0); 1620 } 1621 EXTERN_C_END 1622 1623 #undef __FUNCT__ 1624 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1625 /*@C 1626 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1627 compressed row) format. For good matrix assembly performance the 1628 user should preallocate the matrix storage by setting the parameter nz 1629 (or the array nnz). By setting these parameters accurately, performance 1630 during matrix assembly can be increased by more than a factor of 50. 1631 1632 Collective on Mat 1633 1634 Input Parameters: 1635 + A - the symmetric matrix 1636 . bs - size of block 1637 . nz - number of block nonzeros per block row (same for all rows) 1638 - nnz - array containing the number of block nonzeros in the upper triangular plus 1639 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1640 1641 Options Database Keys: 1642 . -mat_no_unroll - uses code that does not unroll the loops in the 1643 block calculations (much slower) 1644 . -mat_block_size - size of the blocks to use 1645 1646 Level: intermediate 1647 1648 Notes: 1649 Specify the preallocated storage with either nz or nnz (not both). 1650 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1651 allocation. For additional details, see the users manual chapter on 1652 matrices. 1653 1654 If the nnz parameter is given then the nz parameter is ignored 1655 1656 1657 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1658 @*/ 1659 PetscErrorCode MatSeqSBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 1660 { 1661 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 1662 1663 PetscFunctionBegin; 1664 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1665 if (f) { 1666 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 1667 } 1668 PetscFunctionReturn(0); 1669 } 1670 1671 #undef __FUNCT__ 1672 #define __FUNCT__ "MatCreateSeqSBAIJ" 1673 /*@C 1674 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1675 compressed row) format. For good matrix assembly performance the 1676 user should preallocate the matrix storage by setting the parameter nz 1677 (or the array nnz). By setting these parameters accurately, performance 1678 during matrix assembly can be increased by more than a factor of 50. 1679 1680 Collective on MPI_Comm 1681 1682 Input Parameters: 1683 + comm - MPI communicator, set to PETSC_COMM_SELF 1684 . bs - size of block 1685 . m - number of rows, or number of columns 1686 . nz - number of block nonzeros per block row (same for all rows) 1687 - nnz - array containing the number of block nonzeros in the upper triangular plus 1688 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1689 1690 Output Parameter: 1691 . A - the symmetric matrix 1692 1693 Options Database Keys: 1694 . -mat_no_unroll - uses code that does not unroll the loops in the 1695 block calculations (much slower) 1696 . -mat_block_size - size of the blocks to use 1697 1698 Level: intermediate 1699 1700 Notes: 1701 1702 Specify the preallocated storage with either nz or nnz (not both). 1703 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1704 allocation. For additional details, see the users manual chapter on 1705 matrices. 1706 1707 If the nnz parameter is given then the nz parameter is ignored 1708 1709 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1710 @*/ 1711 PetscErrorCode MatCreateSeqSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 1712 { 1713 PetscErrorCode ierr; 1714 1715 PetscFunctionBegin; 1716 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1717 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1718 ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1719 PetscFunctionReturn(0); 1720 } 1721 1722 #undef __FUNCT__ 1723 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1724 PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1725 { 1726 Mat C; 1727 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1728 PetscErrorCode ierr; 1729 PetscInt i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 1730 1731 PetscFunctionBegin; 1732 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1733 1734 *B = 0; 1735 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1736 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 1737 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1738 c = (Mat_SeqSBAIJ*)C->data; 1739 1740 C->preallocated = PETSC_TRUE; 1741 C->factor = A->factor; 1742 c->row = 0; 1743 c->icol = 0; 1744 c->saved_values = 0; 1745 c->keepzeroedrows = a->keepzeroedrows; 1746 C->assembled = PETSC_TRUE; 1747 1748 C->M = A->M; 1749 C->N = A->N; 1750 C->bs = A->bs; 1751 c->bs2 = a->bs2; 1752 c->mbs = a->mbs; 1753 c->nbs = a->nbs; 1754 1755 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->imax);CHKERRQ(ierr); 1756 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->ilen);CHKERRQ(ierr); 1757 for (i=0; i<mbs; i++) { 1758 c->imax[i] = a->imax[i]; 1759 c->ilen[i] = a->ilen[i]; 1760 } 1761 1762 /* allocate the matrix space */ 1763 c->singlemalloc = PETSC_TRUE; 1764 len = (mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt)); 1765 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1766 c->j = (PetscInt*)(c->a + nz*bs2); 1767 c->i = c->j + nz; 1768 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1769 if (mbs > 0) { 1770 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1771 if (cpvalues == MAT_COPY_VALUES) { 1772 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1773 } else { 1774 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1775 } 1776 } 1777 1778 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1779 c->sorted = a->sorted; 1780 c->roworiented = a->roworiented; 1781 c->nonew = a->nonew; 1782 1783 if (a->diag) { 1784 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 1785 PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt)); 1786 for (i=0; i<mbs; i++) { 1787 c->diag[i] = a->diag[i]; 1788 } 1789 } else c->diag = 0; 1790 c->nz = a->nz; 1791 c->maxnz = a->maxnz; 1792 c->solve_work = 0; 1793 c->mult_work = 0; 1794 *B = C; 1795 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1796 PetscFunctionReturn(0); 1797 } 1798 1799 #undef __FUNCT__ 1800 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1801 PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer,const MatType type,Mat *A) 1802 { 1803 Mat_SeqSBAIJ *a; 1804 Mat B; 1805 PetscErrorCode ierr; 1806 int fd; 1807 PetscMPIInt size; 1808 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 1809 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 1810 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 1811 PetscInt *masked,nmask,tmp,bs2,ishift; 1812 PetscScalar *aa; 1813 MPI_Comm comm = ((PetscObject)viewer)->comm; 1814 1815 PetscFunctionBegin; 1816 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1817 bs2 = bs*bs; 1818 1819 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1820 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1821 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1822 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1823 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1824 M = header[1]; N = header[2]; nz = header[3]; 1825 1826 if (header[3] < 0) { 1827 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1828 } 1829 1830 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1831 1832 /* 1833 This code adds extra rows to make sure the number of rows is 1834 divisible by the blocksize 1835 */ 1836 mbs = M/bs; 1837 extra_rows = bs - M + bs*(mbs); 1838 if (extra_rows == bs) extra_rows = 0; 1839 else mbs++; 1840 if (extra_rows) { 1841 PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n"); 1842 } 1843 1844 /* read in row lengths */ 1845 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1846 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1847 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1848 1849 /* read in column indices */ 1850 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1851 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1852 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1853 1854 /* loop over row lengths determining block row lengths */ 1855 ierr = PetscMalloc(mbs*sizeof(PetscInt),&s_browlengths);CHKERRQ(ierr); 1856 ierr = PetscMemzero(s_browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1857 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 1858 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1859 masked = mask + mbs; 1860 rowcount = 0; nzcount = 0; 1861 for (i=0; i<mbs; i++) { 1862 nmask = 0; 1863 for (j=0; j<bs; j++) { 1864 kmax = rowlengths[rowcount]; 1865 for (k=0; k<kmax; k++) { 1866 tmp = jj[nzcount++]/bs; /* block col. index */ 1867 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1868 } 1869 rowcount++; 1870 } 1871 s_browlengths[i] += nmask; 1872 1873 /* zero out the mask elements we set */ 1874 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1875 } 1876 1877 /* create our matrix */ 1878 ierr = MatCreate(comm,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows,&B);CHKERRQ(ierr); 1879 ierr = MatSetType(B,type);CHKERRQ(ierr); 1880 ierr = MatSeqSBAIJSetPreallocation(B,bs,0,s_browlengths);CHKERRQ(ierr); 1881 a = (Mat_SeqSBAIJ*)B->data; 1882 1883 /* set matrix "i" values */ 1884 a->i[0] = 0; 1885 for (i=1; i<= mbs; i++) { 1886 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1887 a->ilen[i-1] = s_browlengths[i-1]; 1888 } 1889 a->nz = a->i[mbs]; 1890 1891 /* read in nonzero values */ 1892 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1893 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1894 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1895 1896 /* set "a" and "j" values into matrix */ 1897 nzcount = 0; jcount = 0; 1898 for (i=0; i<mbs; i++) { 1899 nzcountb = nzcount; 1900 nmask = 0; 1901 for (j=0; j<bs; j++) { 1902 kmax = rowlengths[i*bs+j]; 1903 for (k=0; k<kmax; k++) { 1904 tmp = jj[nzcount++]/bs; /* block col. index */ 1905 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1906 } 1907 } 1908 /* sort the masked values */ 1909 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1910 1911 /* set "j" values into matrix */ 1912 maskcount = 1; 1913 for (j=0; j<nmask; j++) { 1914 a->j[jcount++] = masked[j]; 1915 mask[masked[j]] = maskcount++; 1916 } 1917 1918 /* set "a" values into matrix */ 1919 ishift = bs2*a->i[i]; 1920 for (j=0; j<bs; j++) { 1921 kmax = rowlengths[i*bs+j]; 1922 for (k=0; k<kmax; k++) { 1923 tmp = jj[nzcountb]/bs ; /* block col. index */ 1924 if (tmp >= i){ 1925 block = mask[tmp] - 1; 1926 point = jj[nzcountb] - bs*tmp; 1927 idx = ishift + bs2*block + j + bs*point; 1928 a->a[idx] = aa[nzcountb]; 1929 } 1930 nzcountb++; 1931 } 1932 } 1933 /* zero out the mask elements we set */ 1934 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1935 } 1936 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1937 1938 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1939 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1940 ierr = PetscFree(aa);CHKERRQ(ierr); 1941 ierr = PetscFree(jj);CHKERRQ(ierr); 1942 ierr = PetscFree(mask);CHKERRQ(ierr); 1943 1944 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1945 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1946 ierr = MatView_Private(B);CHKERRQ(ierr); 1947 *A = B; 1948 PetscFunctionReturn(0); 1949 } 1950 1951 #undef __FUNCT__ 1952 #define __FUNCT__ "MatRelax_SeqSBAIJ" 1953 PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1954 { 1955 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1956 MatScalar *aa=a->a,*v,*v1; 1957 PetscScalar *x,*b,*t,sum,d; 1958 PetscErrorCode ierr; 1959 PetscInt m=a->mbs,bs=A->bs,*ai=a->i,*aj=a->j; 1960 PetscInt nz,nz1,*vj,*vj1,i; 1961 1962 PetscFunctionBegin; 1963 its = its*lits; 1964 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1965 1966 if (bs > 1) 1967 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 1968 1969 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1970 if (xx != bb) { 1971 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1972 } else { 1973 b = x; 1974 } 1975 1976 ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr); 1977 1978 if (flag & SOR_ZERO_INITIAL_GUESS) { 1979 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1980 for (i=0; i<m; i++) 1981 t[i] = b[i]; 1982 1983 for (i=0; i<m; i++){ 1984 d = *(aa + ai[i]); /* diag[i] */ 1985 v = aa + ai[i] + 1; 1986 vj = aj + ai[i] + 1; 1987 nz = ai[i+1] - ai[i] - 1; 1988 x[i] = omega*t[i]/d; 1989 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 1990 PetscLogFlops(2*nz-1); 1991 } 1992 } 1993 1994 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 1995 for (i=0; i<m; i++) 1996 t[i] = b[i]; 1997 1998 for (i=0; i<m-1; i++){ /* update rhs */ 1999 v = aa + ai[i] + 1; 2000 vj = aj + ai[i] + 1; 2001 nz = ai[i+1] - ai[i] - 1; 2002 while (nz--) t[*vj++] -= x[i]*(*v++); 2003 PetscLogFlops(2*nz-1); 2004 } 2005 for (i=m-1; i>=0; i--){ 2006 d = *(aa + ai[i]); 2007 v = aa + ai[i] + 1; 2008 vj = aj + ai[i] + 1; 2009 nz = ai[i+1] - ai[i] - 1; 2010 sum = t[i]; 2011 while (nz--) sum -= x[*vj++]*(*v++); 2012 PetscLogFlops(2*nz-1); 2013 x[i] = (1-omega)*x[i] + omega*sum/d; 2014 } 2015 } 2016 its--; 2017 } 2018 2019 while (its--) { 2020 /* 2021 forward sweep: 2022 for i=0,...,m-1: 2023 sum[i] = (b[i] - U(i,:)x )/d[i]; 2024 x[i] = (1-omega)x[i] + omega*sum[i]; 2025 b = b - x[i]*U^T(i,:); 2026 2027 */ 2028 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2029 for (i=0; i<m; i++) 2030 t[i] = b[i]; 2031 2032 for (i=0; i<m; i++){ 2033 d = *(aa + ai[i]); /* diag[i] */ 2034 v = aa + ai[i] + 1; v1=v; 2035 vj = aj + ai[i] + 1; vj1=vj; 2036 nz = ai[i+1] - ai[i] - 1; nz1=nz; 2037 sum = t[i]; 2038 while (nz1--) sum -= (*v1++)*x[*vj1++]; 2039 x[i] = (1-omega)*x[i] + omega*sum/d; 2040 while (nz--) t[*vj++] -= x[i]*(*v++); 2041 PetscLogFlops(4*nz-2); 2042 } 2043 } 2044 2045 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2046 /* 2047 backward sweep: 2048 b = b - x[i]*U^T(i,:), i=0,...,n-2 2049 for i=m-1,...,0: 2050 sum[i] = (b[i] - U(i,:)x )/d[i]; 2051 x[i] = (1-omega)x[i] + omega*sum[i]; 2052 */ 2053 for (i=0; i<m; i++) 2054 t[i] = b[i]; 2055 2056 for (i=0; i<m-1; i++){ /* update rhs */ 2057 v = aa + ai[i] + 1; 2058 vj = aj + ai[i] + 1; 2059 nz = ai[i+1] - ai[i] - 1; 2060 while (nz--) t[*vj++] -= x[i]*(*v++); 2061 PetscLogFlops(2*nz-1); 2062 } 2063 for (i=m-1; i>=0; i--){ 2064 d = *(aa + ai[i]); 2065 v = aa + ai[i] + 1; 2066 vj = aj + ai[i] + 1; 2067 nz = ai[i+1] - ai[i] - 1; 2068 sum = t[i]; 2069 while (nz--) sum -= x[*vj++]*(*v++); 2070 PetscLogFlops(2*nz-1); 2071 x[i] = (1-omega)*x[i] + omega*sum/d; 2072 } 2073 } 2074 } 2075 2076 ierr = PetscFree(t);CHKERRQ(ierr); 2077 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2078 if (bb != xx) { 2079 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2080 } 2081 2082 PetscFunctionReturn(0); 2083 } 2084 2085 2086 2087 2088 2089 2090