1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the SBAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 8 #include "src/inline/spops.h" 9 #include "src/mat/impls/sbaij/seq/sbaij.h" 10 11 #define CHUNKSIZE 10 12 13 /* 14 Checks for missing diagonals 15 */ 16 #undef __FUNCT__ 17 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 18 PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A) 19 { 20 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 21 PetscErrorCode ierr; 22 PetscInt *diag,*jj = a->j,i; 23 24 PetscFunctionBegin; 25 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 26 diag = a->diag; 27 for (i=0; i<a->mbs; i++) { 28 if (jj[diag[i]] != i) SETERRQ1(PETSC_ERR_ARG_CORRUPT,"Matrix is missing diagonal number %D",i); 29 } 30 PetscFunctionReturn(0); 31 } 32 33 #undef __FUNCT__ 34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 35 PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A) 36 { 37 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 38 PetscErrorCode ierr; 39 PetscInt i,mbs = a->mbs; 40 41 PetscFunctionBegin; 42 if (a->diag) PetscFunctionReturn(0); 43 44 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 45 ierr = PetscLogObjectMemory(A,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 46 for (i=0; i<mbs; i++) a->diag[i] = a->i[i]; 47 PetscFunctionReturn(0); 48 } 49 50 #undef __FUNCT__ 51 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 52 static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 53 { 54 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 55 PetscInt n = a->mbs,i; 56 57 PetscFunctionBegin; 58 *nn = n; 59 if (!ia) PetscFunctionReturn(0); 60 61 if (oshift == 1) { 62 /* temporarily add 1 to i and j indices */ 63 PetscInt nz = a->i[n]; 64 for (i=0; i<nz; i++) a->j[i]++; 65 for (i=0; i<n+1; i++) a->i[i]++; 66 *ia = a->i; *ja = a->j; 67 } else { 68 *ia = a->i; *ja = a->j; 69 } 70 PetscFunctionReturn(0); 71 } 72 73 #undef __FUNCT__ 74 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 75 static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 76 { 77 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 78 PetscInt i,n = a->mbs; 79 80 PetscFunctionBegin; 81 if (!ia) PetscFunctionReturn(0); 82 83 if (oshift == 1) { 84 PetscInt nz = a->i[n]-1; 85 for (i=0; i<nz; i++) a->j[i]--; 86 for (i=0; i<n+1; i++) a->i[i]--; 87 } 88 PetscFunctionReturn(0); 89 } 90 91 #undef __FUNCT__ 92 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 93 PetscErrorCode MatDestroy_SeqSBAIJ(Mat A) 94 { 95 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 96 PetscErrorCode ierr; 97 98 PetscFunctionBegin; 99 PetscLogObjectState((PetscObject)A,"Rows=%D, NZ=%D",A->m,a->nz); 100 ierr = MatSeqXAIJFreeAIJ(a->singlemalloc,a->a,a->j,a->i);CHKERRQ(ierr); 101 if (a->row) { 102 ierr = ISDestroy(a->row);CHKERRQ(ierr); 103 } 104 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 105 if (a->imax) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 106 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 107 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 108 if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);} 109 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 110 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 111 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 112 113 if (a->inew){ 114 ierr = PetscFree(a->inew);CHKERRQ(ierr); 115 a->inew = 0; 116 } 117 ierr = PetscFree(a);CHKERRQ(ierr); 118 119 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 120 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 121 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 122 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 123 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqbaij_C","",PETSC_NULL);CHKERRQ(ierr); 124 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 125 PetscFunctionReturn(0); 126 } 127 128 #undef __FUNCT__ 129 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 130 PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op) 131 { 132 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 133 PetscErrorCode ierr; 134 135 PetscFunctionBegin; 136 switch (op) { 137 case MAT_ROW_ORIENTED: 138 a->roworiented = PETSC_TRUE; 139 break; 140 case MAT_COLUMN_ORIENTED: 141 a->roworiented = PETSC_FALSE; 142 break; 143 case MAT_COLUMNS_SORTED: 144 a->sorted = PETSC_TRUE; 145 break; 146 case MAT_COLUMNS_UNSORTED: 147 a->sorted = PETSC_FALSE; 148 break; 149 case MAT_KEEP_ZEROED_ROWS: 150 a->keepzeroedrows = PETSC_TRUE; 151 break; 152 case MAT_NO_NEW_NONZERO_LOCATIONS: 153 a->nonew = 1; 154 break; 155 case MAT_NEW_NONZERO_LOCATION_ERR: 156 a->nonew = -1; 157 break; 158 case MAT_NEW_NONZERO_ALLOCATION_ERR: 159 a->nonew = -2; 160 break; 161 case MAT_YES_NEW_NONZERO_LOCATIONS: 162 a->nonew = 0; 163 break; 164 case MAT_ROWS_SORTED: 165 case MAT_ROWS_UNSORTED: 166 case MAT_YES_NEW_DIAGONALS: 167 case MAT_IGNORE_OFF_PROC_ENTRIES: 168 case MAT_USE_HASH_TABLE: 169 ierr = PetscLogInfo((A,"MatSetOption_SeqSBAIJ:Option ignored\n"));CHKERRQ(ierr); 170 break; 171 case MAT_NO_NEW_DIAGONALS: 172 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 173 case MAT_NOT_SYMMETRIC: 174 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 175 case MAT_HERMITIAN: 176 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 177 case MAT_SYMMETRIC: 178 case MAT_STRUCTURALLY_SYMMETRIC: 179 case MAT_NOT_HERMITIAN: 180 case MAT_SYMMETRY_ETERNAL: 181 case MAT_NOT_SYMMETRY_ETERNAL: 182 case MAT_IGNORE_LOWER_TRIANGULAR: 183 a->ignore_ltriangular = PETSC_TRUE; 184 break; 185 case MAT_ERROR_LOWER_TRIANGULAR: 186 a->ignore_ltriangular = PETSC_FALSE; 187 break; 188 default: 189 SETERRQ(PETSC_ERR_SUP,"unknown option"); 190 } 191 PetscFunctionReturn(0); 192 } 193 194 #undef __FUNCT__ 195 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 196 PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **v) 197 { 198 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 199 PetscErrorCode ierr; 200 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2; 201 MatScalar *aa,*aa_i; 202 PetscScalar *v_i; 203 204 PetscFunctionBegin; 205 bs = A->bs; 206 ai = a->i; 207 aj = a->j; 208 aa = a->a; 209 bs2 = a->bs2; 210 211 if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %D out of range", row); 212 213 bn = row/bs; /* Block number */ 214 bp = row % bs; /* Block position */ 215 M = ai[bn+1] - ai[bn]; 216 *ncols = bs*M; 217 218 if (v) { 219 *v = 0; 220 if (*ncols) { 221 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 222 for (i=0; i<M; i++) { /* for each block in the block row */ 223 v_i = *v + i*bs; 224 aa_i = aa + bs2*(ai[bn] + i); 225 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 226 } 227 } 228 } 229 230 if (cols) { 231 *cols = 0; 232 if (*ncols) { 233 ierr = PetscMalloc((*ncols+row)*sizeof(PetscInt),cols);CHKERRQ(ierr); 234 for (i=0; i<M; i++) { /* for each block in the block row */ 235 cols_i = *cols + i*bs; 236 itmp = bs*aj[ai[bn] + i]; 237 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 238 } 239 } 240 } 241 242 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 243 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 244 #ifdef column_search 245 v_i = *v + M*bs; 246 cols_i = *cols + M*bs; 247 for (i=0; i<bn; i++){ /* for each block row */ 248 M = ai[i+1] - ai[i]; 249 for (j=0; j<M; j++){ 250 itmp = aj[ai[i] + j]; /* block column value */ 251 if (itmp == bn){ 252 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 253 for (k=0; k<bs; k++) { 254 *cols_i++ = i*bs+k; 255 *v_i++ = aa_i[k]; 256 } 257 *ncols += bs; 258 break; 259 } 260 } 261 } 262 #endif 263 PetscFunctionReturn(0); 264 } 265 266 #undef __FUNCT__ 267 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 268 PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 269 { 270 PetscErrorCode ierr; 271 272 PetscFunctionBegin; 273 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 274 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 275 PetscFunctionReturn(0); 276 } 277 278 #undef __FUNCT__ 279 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 280 PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,Mat *B) 281 { 282 PetscErrorCode ierr; 283 PetscFunctionBegin; 284 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 285 PetscFunctionReturn(0); 286 } 287 288 #undef __FUNCT__ 289 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 290 static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 291 { 292 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 293 PetscErrorCode ierr; 294 PetscInt i,j,bs = A->bs,k,l,bs2=a->bs2; 295 char *name; 296 PetscViewerFormat format; 297 298 PetscFunctionBegin; 299 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 300 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 301 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 302 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 303 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 304 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 305 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 306 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 307 for (i=0; i<a->mbs; i++) { 308 for (j=0; j<bs; j++) { 309 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 310 for (k=a->i[i]; k<a->i[i+1]; k++) { 311 for (l=0; l<bs; l++) { 312 #if defined(PETSC_USE_COMPLEX) 313 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 314 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 315 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 316 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 317 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 318 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 319 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 320 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 321 } 322 #else 323 if (a->a[bs2*k + l*bs + j] != 0.0) { 324 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 325 } 326 #endif 327 } 328 } 329 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 330 } 331 } 332 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 333 } else { 334 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 335 for (i=0; i<a->mbs; i++) { 336 for (j=0; j<bs; j++) { 337 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 338 for (k=a->i[i]; k<a->i[i+1]; k++) { 339 for (l=0; l<bs; l++) { 340 #if defined(PETSC_USE_COMPLEX) 341 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 342 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 343 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 344 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 345 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 346 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 347 } else { 348 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 349 } 350 #else 351 ierr = PetscViewerASCIIPrintf(viewer," %D %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 352 #endif 353 } 354 } 355 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 356 } 357 } 358 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 359 } 360 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 361 PetscFunctionReturn(0); 362 } 363 364 #undef __FUNCT__ 365 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 366 static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 367 { 368 Mat A = (Mat) Aa; 369 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 370 PetscErrorCode ierr; 371 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->bs,bs2=a->bs2; 372 PetscMPIInt rank; 373 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 374 MatScalar *aa; 375 MPI_Comm comm; 376 PetscViewer viewer; 377 378 PetscFunctionBegin; 379 /* 380 This is nasty. If this is called from an originally parallel matrix 381 then all processes call this,but only the first has the matrix so the 382 rest should return immediately. 383 */ 384 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 385 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 386 if (rank) PetscFunctionReturn(0); 387 388 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 389 390 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 391 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 392 393 /* loop over matrix elements drawing boxes */ 394 color = PETSC_DRAW_BLUE; 395 for (i=0,row=0; i<mbs; i++,row+=bs) { 396 for (j=a->i[i]; j<a->i[i+1]; j++) { 397 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 398 x_l = a->j[j]*bs; x_r = x_l + 1.0; 399 aa = a->a + j*bs2; 400 for (k=0; k<bs; k++) { 401 for (l=0; l<bs; l++) { 402 if (PetscRealPart(*aa++) >= 0.) continue; 403 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 404 } 405 } 406 } 407 } 408 color = PETSC_DRAW_CYAN; 409 for (i=0,row=0; i<mbs; i++,row+=bs) { 410 for (j=a->i[i]; j<a->i[i+1]; j++) { 411 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 412 x_l = a->j[j]*bs; x_r = x_l + 1.0; 413 aa = a->a + j*bs2; 414 for (k=0; k<bs; k++) { 415 for (l=0; l<bs; l++) { 416 if (PetscRealPart(*aa++) != 0.) continue; 417 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 418 } 419 } 420 } 421 } 422 423 color = PETSC_DRAW_RED; 424 for (i=0,row=0; i<mbs; i++,row+=bs) { 425 for (j=a->i[i]; j<a->i[i+1]; j++) { 426 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 427 x_l = a->j[j]*bs; x_r = x_l + 1.0; 428 aa = a->a + j*bs2; 429 for (k=0; k<bs; k++) { 430 for (l=0; l<bs; l++) { 431 if (PetscRealPart(*aa++) <= 0.) continue; 432 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 433 } 434 } 435 } 436 } 437 PetscFunctionReturn(0); 438 } 439 440 #undef __FUNCT__ 441 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 442 static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 443 { 444 PetscErrorCode ierr; 445 PetscReal xl,yl,xr,yr,w,h; 446 PetscDraw draw; 447 PetscTruth isnull; 448 449 PetscFunctionBegin; 450 451 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 452 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 453 454 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 455 xr = A->m; yr = A->m; h = yr/10.0; w = xr/10.0; 456 xr += w; yr += h; xl = -w; yl = -h; 457 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 458 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 459 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 460 PetscFunctionReturn(0); 461 } 462 463 #undef __FUNCT__ 464 #define __FUNCT__ "MatView_SeqSBAIJ" 465 PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 466 { 467 PetscErrorCode ierr; 468 PetscTruth iascii,isdraw; 469 470 PetscFunctionBegin; 471 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 472 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 473 if (iascii){ 474 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 475 } else if (isdraw) { 476 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 477 } else { 478 Mat B; 479 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 480 ierr = MatView(B,viewer);CHKERRQ(ierr); 481 ierr = MatDestroy(B);CHKERRQ(ierr); 482 } 483 PetscFunctionReturn(0); 484 } 485 486 487 #undef __FUNCT__ 488 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 489 PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 490 { 491 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 492 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 493 PetscInt *ai = a->i,*ailen = a->ilen; 494 PetscInt brow,bcol,ridx,cidx,bs=A->bs,bs2=a->bs2; 495 MatScalar *ap,*aa = a->a,zero = 0.0; 496 497 PetscFunctionBegin; 498 for (k=0; k<m; k++) { /* loop over rows */ 499 row = im[k]; brow = row/bs; 500 if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",row); 501 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1); 502 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 503 nrow = ailen[brow]; 504 for (l=0; l<n; l++) { /* loop over columns */ 505 if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",in[l]); 506 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->n-1); 507 col = in[l] ; 508 bcol = col/bs; 509 cidx = col%bs; 510 ridx = row%bs; 511 high = nrow; 512 low = 0; /* assume unsorted */ 513 while (high-low > 5) { 514 t = (low+high)/2; 515 if (rp[t] > bcol) high = t; 516 else low = t; 517 } 518 for (i=low; i<high; i++) { 519 if (rp[i] > bcol) break; 520 if (rp[i] == bcol) { 521 *v++ = ap[bs2*i+bs*cidx+ridx]; 522 goto finished; 523 } 524 } 525 *v++ = zero; 526 finished:; 527 } 528 } 529 PetscFunctionReturn(0); 530 } 531 532 533 #undef __FUNCT__ 534 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 535 PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 536 { 537 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 538 PetscErrorCode ierr; 539 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 540 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 541 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->bs,stepval; 542 PetscTruth roworiented=a->roworiented; 543 const MatScalar *value = v; 544 MatScalar *ap,*aa = a->a,*bap; 545 546 PetscFunctionBegin; 547 if (roworiented) { 548 stepval = (n-1)*bs; 549 } else { 550 stepval = (m-1)*bs; 551 } 552 for (k=0; k<m; k++) { /* loop over added rows */ 553 row = im[k]; 554 if (row < 0) continue; 555 #if defined(PETSC_USE_DEBUG) 556 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 557 #endif 558 rp = aj + ai[row]; 559 ap = aa + bs2*ai[row]; 560 rmax = imax[row]; 561 nrow = ailen[row]; 562 low = 0; 563 high = nrow; 564 for (l=0; l<n; l++) { /* loop over added columns */ 565 if (in[l] < 0) continue; 566 col = in[l]; 567 #if defined(PETSC_USE_DEBUG) 568 if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",col,a->nbs-1); 569 #endif 570 if (col < row) continue; /* ignore lower triangular block */ 571 if (roworiented) { 572 value = v + k*(stepval+bs)*bs + l*bs; 573 } else { 574 value = v + l*(stepval+bs)*bs + k*bs; 575 } 576 if (col < lastcol) low = 0; else high = nrow; 577 lastcol = col; 578 while (high-low > 7) { 579 t = (low+high)/2; 580 if (rp[t] > col) high = t; 581 else low = t; 582 } 583 for (i=low; i<high; i++) { 584 if (rp[i] > col) break; 585 if (rp[i] == col) { 586 bap = ap + bs2*i; 587 if (roworiented) { 588 if (is == ADD_VALUES) { 589 for (ii=0; ii<bs; ii++,value+=stepval) { 590 for (jj=ii; jj<bs2; jj+=bs) { 591 bap[jj] += *value++; 592 } 593 } 594 } else { 595 for (ii=0; ii<bs; ii++,value+=stepval) { 596 for (jj=ii; jj<bs2; jj+=bs) { 597 bap[jj] = *value++; 598 } 599 } 600 } 601 } else { 602 if (is == ADD_VALUES) { 603 for (ii=0; ii<bs; ii++,value+=stepval) { 604 for (jj=0; jj<bs; jj++) { 605 *bap++ += *value++; 606 } 607 } 608 } else { 609 for (ii=0; ii<bs; ii++,value+=stepval) { 610 for (jj=0; jj<bs; jj++) { 611 *bap++ = *value++; 612 } 613 } 614 } 615 } 616 goto noinsert2; 617 } 618 } 619 if (nonew == 1) goto noinsert2; 620 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 621 if (nrow >= rmax) { 622 /* there is no extra room in row, therefore enlarge */ 623 PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 624 MatScalar *new_a; 625 626 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 627 628 /* malloc new storage space */ 629 ierr = PetscMalloc3(bs2*new_nz,PetscScalar,&new_a,new_nz,PetscInt,&new_j,a->mbs+1,PetscInt,&new_i);CHKERRQ(ierr); 630 631 /* copy over old data into new slots */ 632 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 633 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 634 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); 635 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 636 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); 637 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 638 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 639 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 640 /* free up old matrix storage */ 641 ierr = MatSeqXAIJFreeAIJ(a->singlemalloc,a->a,a->j,a->i);CHKERRQ(ierr); 642 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 643 a->singlemalloc = PETSC_TRUE; 644 645 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 646 rmax = imax[row] = imax[row] + CHUNKSIZE; 647 ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); 648 a->maxnz += bs2*CHUNKSIZE; 649 a->reallocs++; 650 a->nz++; 651 } 652 N = nrow++ - 1; 653 /* shift up all the later entries in this row */ 654 for (ii=N; ii>=i; ii--) { 655 rp[ii+1] = rp[ii]; 656 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 657 } 658 if (N >= i) { 659 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 660 } 661 rp[i] = col; 662 bap = ap + bs2*i; 663 if (roworiented) { 664 for (ii=0; ii<bs; ii++,value+=stepval) { 665 for (jj=ii; jj<bs2; jj+=bs) { 666 bap[jj] = *value++; 667 } 668 } 669 } else { 670 for (ii=0; ii<bs; ii++,value+=stepval) { 671 for (jj=0; jj<bs; jj++) { 672 *bap++ = *value++; 673 } 674 } 675 } 676 noinsert2:; 677 low = i; 678 } 679 ailen[row] = nrow; 680 } 681 PetscFunctionReturn(0); 682 } 683 684 #undef __FUNCT__ 685 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 686 PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 687 { 688 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 689 PetscErrorCode ierr; 690 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 691 PetscInt m = A->m,*ip,N,*ailen = a->ilen; 692 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 693 MatScalar *aa = a->a,*ap; 694 695 PetscFunctionBegin; 696 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 697 698 if (m) rmax = ailen[0]; 699 for (i=1; i<mbs; i++) { 700 /* move each row back by the amount of empty slots (fshift) before it*/ 701 fshift += imax[i-1] - ailen[i-1]; 702 rmax = PetscMax(rmax,ailen[i]); 703 if (fshift) { 704 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 705 N = ailen[i]; 706 for (j=0; j<N; j++) { 707 ip[j-fshift] = ip[j]; 708 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 709 } 710 } 711 ai[i] = ai[i-1] + ailen[i-1]; 712 } 713 if (mbs) { 714 fshift += imax[mbs-1] - ailen[mbs-1]; 715 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 716 } 717 /* reset ilen and imax for each row */ 718 for (i=0; i<mbs; i++) { 719 ailen[i] = imax[i] = ai[i+1] - ai[i]; 720 } 721 a->nz = ai[mbs]; 722 723 /* diagonals may have moved, reset it */ 724 if (a->diag) { 725 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 726 } 727 ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n", 728 m,A->m,A->bs,fshift*bs2,a->nz*bs2));CHKERRQ(ierr); 729 ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %D\n",a->reallocs));CHKERRQ(ierr); 730 ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %D\n",rmax));CHKERRQ(ierr); 731 a->reallocs = 0; 732 A->info.nz_unneeded = (PetscReal)fshift*bs2; 733 PetscFunctionReturn(0); 734 } 735 736 /* 737 This function returns an array of flags which indicate the locations of contiguous 738 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 739 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 740 Assume: sizes should be long enough to hold all the values. 741 */ 742 #undef __FUNCT__ 743 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 744 PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 745 { 746 PetscInt i,j,k,row; 747 PetscTruth flg; 748 749 PetscFunctionBegin; 750 for (i=0,j=0; i<n; j++) { 751 row = idx[i]; 752 if (row%bs!=0) { /* Not the begining of a block */ 753 sizes[j] = 1; 754 i++; 755 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 756 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 757 i++; 758 } else { /* Begining of the block, so check if the complete block exists */ 759 flg = PETSC_TRUE; 760 for (k=1; k<bs; k++) { 761 if (row+k != idx[i+k]) { /* break in the block */ 762 flg = PETSC_FALSE; 763 break; 764 } 765 } 766 if (flg) { /* No break in the bs */ 767 sizes[j] = bs; 768 i+= bs; 769 } else { 770 sizes[j] = 1; 771 i++; 772 } 773 } 774 } 775 *bs_max = j; 776 PetscFunctionReturn(0); 777 } 778 779 780 /* Only add/insert a(i,j) with i<=j (blocks). 781 Any a(i,j) with i>j input by user is ingored. 782 */ 783 784 #undef __FUNCT__ 785 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 786 PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 787 { 788 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 789 PetscErrorCode ierr; 790 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 791 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 792 PetscInt *aj=a->j,nonew=a->nonew,bs=A->bs,brow,bcol; 793 PetscInt ridx,cidx,bs2=a->bs2; 794 MatScalar *ap,value,*aa=a->a,*bap; 795 796 PetscFunctionBegin; 797 for (k=0; k<m; k++) { /* loop over added rows */ 798 row = im[k]; /* row number */ 799 brow = row/bs; /* block row number */ 800 if (row < 0) continue; 801 #if defined(PETSC_USE_DEBUG) 802 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1); 803 #endif 804 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 805 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 806 rmax = imax[brow]; /* maximum space allocated for this row */ 807 nrow = ailen[brow]; /* actual length of this row */ 808 low = 0; 809 810 for (l=0; l<n; l++) { /* loop over added columns */ 811 if (in[l] < 0) continue; 812 #if defined(PETSC_USE_DEBUG) 813 if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->m-1); 814 #endif 815 col = in[l]; 816 bcol = col/bs; /* block col number */ 817 818 if (brow > bcol) { 819 if (a->ignore_ltriangular){ 820 continue; /* ignore lower triangular values */ 821 } else { 822 SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR)"); 823 } 824 } 825 826 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 827 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 828 /* element value a(k,l) */ 829 if (roworiented) { 830 value = v[l + k*n]; 831 } else { 832 value = v[k + l*m]; 833 } 834 835 /* move pointer bap to a(k,l) quickly and add/insert value */ 836 if (col < lastcol) low = 0; high = nrow; 837 lastcol = col; 838 while (high-low > 7) { 839 t = (low+high)/2; 840 if (rp[t] > bcol) high = t; 841 else low = t; 842 } 843 for (i=low; i<high; i++) { 844 if (rp[i] > bcol) break; 845 if (rp[i] == bcol) { 846 bap = ap + bs2*i + bs*cidx + ridx; 847 if (is == ADD_VALUES) *bap += value; 848 else *bap = value; 849 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 850 if (brow == bcol && ridx < cidx){ 851 bap = ap + bs2*i + bs*ridx + cidx; 852 if (is == ADD_VALUES) *bap += value; 853 else *bap = value; 854 } 855 goto noinsert1; 856 } 857 } 858 859 if (nonew == 1) goto noinsert1; 860 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 861 if (nrow >= rmax) { 862 /* there is no extra room in row, therefore enlarge */ 863 PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 864 MatScalar *new_a; 865 866 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 867 868 /* Malloc new storage space */ 869 ierr = PetscMalloc3(bs2*new_nz,PetscScalar,&new_a,new_nz,PetscInt,&new_j,a->mbs+1,PetscInt,&new_i);CHKERRQ(ierr); 870 871 /* copy over old data into new slots */ 872 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 873 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 874 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); 875 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 876 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); 877 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 878 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 879 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 880 /* free up old matrix storage */ 881 ierr = MatSeqXAIJFreeAIJ(a->singlemalloc,a->a,a->j,a->i);CHKERRQ(ierr); 882 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 883 a->singlemalloc = PETSC_TRUE; 884 885 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 886 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 887 ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); 888 a->maxnz += bs2*CHUNKSIZE; 889 a->reallocs++; 890 a->nz++; 891 } 892 893 N = nrow++ - 1; 894 /* shift up all the later entries in this row */ 895 for (ii=N; ii>=i; ii--) { 896 rp[ii+1] = rp[ii]; 897 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 898 } 899 if (N>=i) { 900 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 901 } 902 rp[i] = bcol; 903 ap[bs2*i + bs*cidx + ridx] = value; 904 noinsert1:; 905 low = i; 906 } 907 } /* end of loop over added columns */ 908 ailen[brow] = nrow; 909 } /* end of loop over added rows */ 910 PetscFunctionReturn(0); 911 } 912 913 EXTERN PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*); 914 EXTERN PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*); 915 EXTERN PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat,PetscInt,IS[],PetscInt); 916 EXTERN PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,PetscInt,MatReuse,Mat*); 917 EXTERN PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat*[]); 918 EXTERN PetscErrorCode MatScale_SeqSBAIJ(const PetscScalar*,Mat); 919 EXTERN PetscErrorCode MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *); 920 EXTERN PetscErrorCode MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*); 921 EXTERN PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat,Vec); 922 EXTERN PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec); 923 EXTERN PetscErrorCode MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *); 924 EXTERN PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat); 925 EXTERN PetscErrorCode MatGetRowMax_SeqSBAIJ(Mat,Vec); 926 927 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N(Mat,Vec,Vec); 928 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1(Mat,Vec,Vec); 929 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2(Mat,Vec,Vec); 930 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3(Mat,Vec,Vec); 931 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4(Mat,Vec,Vec); 932 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5(Mat,Vec,Vec); 933 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6(Mat,Vec,Vec); 934 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7(Mat,Vec,Vec); 935 936 EXTERN PetscErrorCode MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs); 937 938 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec); 939 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec); 940 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec); 941 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 942 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec); 943 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec); 944 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec); 945 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec); 946 947 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,MatFactorInfo*,Mat*); 948 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,MatFactorInfo*,Mat*); 949 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,MatFactorInfo*,Mat*); 950 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,MatFactorInfo*,Mat*); 951 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,MatFactorInfo*,Mat*); 952 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,MatFactorInfo*,Mat*); 953 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,MatFactorInfo*,Mat*); 954 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,MatFactorInfo*,Mat*); 955 EXTERN PetscErrorCode MatGetInertia_SeqSBAIJ(Mat,PetscInt*,PetscInt*,PetscInt*); 956 957 EXTERN PetscErrorCode MatMult_SeqSBAIJ_1(Mat,Vec,Vec); 958 EXTERN PetscErrorCode MatMult_SeqSBAIJ_2(Mat,Vec,Vec); 959 EXTERN PetscErrorCode MatMult_SeqSBAIJ_3(Mat,Vec,Vec); 960 EXTERN PetscErrorCode MatMult_SeqSBAIJ_4(Mat,Vec,Vec); 961 EXTERN PetscErrorCode MatMult_SeqSBAIJ_5(Mat,Vec,Vec); 962 EXTERN PetscErrorCode MatMult_SeqSBAIJ_6(Mat,Vec,Vec); 963 EXTERN PetscErrorCode MatMult_SeqSBAIJ_7(Mat,Vec,Vec); 964 EXTERN PetscErrorCode MatMult_SeqSBAIJ_N(Mat,Vec,Vec); 965 966 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec); 967 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec); 968 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec); 969 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec); 970 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec); 971 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec); 972 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec); 973 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec); 974 975 #ifdef HAVE_MatICCFactor 976 /* modified from MatILUFactor_SeqSBAIJ, needs further work! */ 977 #undef __FUNCT__ 978 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 979 PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info) 980 { 981 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 982 Mat outA; 983 PetscErrorCode ierr; 984 PetscTruth row_identity,col_identity; 985 986 PetscFunctionBegin; 987 outA = inA; 988 inA->factor = FACTOR_CHOLESKY; 989 990 if (!a->diag) { 991 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 992 } 993 /* 994 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 995 for ILU(0) factorization with natural ordering 996 */ 997 switch (a->bs) { 998 case 1: 999 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1000 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1001 inA->ops->solves = MatSolves_SeqSBAIJ_1; 1002 ierr = PetscLoginfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n"));CHKERRQ(ierr); 1003 case 2: 1004 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 1005 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1006 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1007 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"));CHKERRQ(ierr); 1008 break; 1009 case 3: 1010 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 1011 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1012 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1013 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n"));CHKERRQ(ierr); 1014 break; 1015 case 4: 1016 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 1017 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1018 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1019 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"));CHKERRQ(ierr); 1020 break; 1021 case 5: 1022 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 1023 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1024 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1025 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"));CHKERRQ(ierr); 1026 break; 1027 case 6: 1028 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 1029 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1030 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1031 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"));CHKERRQ(ierr); 1032 break; 1033 case 7: 1034 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 1035 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1036 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1037 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"));CHKERRQ(ierr); 1038 break; 1039 default: 1040 a->row = row; 1041 a->icol = col; 1042 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1043 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1044 1045 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1046 ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr); 1047 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 1048 1049 if (!a->solve_work) { 1050 ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1051 ierr = PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 1052 } 1053 } 1054 1055 ierr = MatCholeskyFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 1056 PetscFunctionReturn(0); 1057 } 1058 #endif 1059 1060 #undef __FUNCT__ 1061 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ" 1062 PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat A) 1063 { 1064 static PetscTruth called = PETSC_FALSE; 1065 MPI_Comm comm = A->comm; 1066 PetscErrorCode ierr; 1067 1068 PetscFunctionBegin; 1069 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1070 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1071 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1072 ierr = (*PetscHelpPrintf)(comm," -mat_ignore_ltriangular: Ignore lower triangular values set by user\n");CHKERRQ(ierr); 1073 PetscFunctionReturn(0); 1074 } 1075 1076 EXTERN_C_BEGIN 1077 #undef __FUNCT__ 1078 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1079 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,PetscInt *indices) 1080 { 1081 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1082 PetscInt i,nz,n; 1083 1084 PetscFunctionBegin; 1085 nz = baij->maxnz; 1086 n = mat->n; 1087 for (i=0; i<nz; i++) { 1088 baij->j[i] = indices[i]; 1089 } 1090 baij->nz = nz; 1091 for (i=0; i<n; i++) { 1092 baij->ilen[i] = baij->imax[i]; 1093 } 1094 PetscFunctionReturn(0); 1095 } 1096 EXTERN_C_END 1097 1098 #undef __FUNCT__ 1099 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1100 /*@ 1101 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1102 in the matrix. 1103 1104 Input Parameters: 1105 + mat - the SeqSBAIJ matrix 1106 - indices - the column indices 1107 1108 Level: advanced 1109 1110 Notes: 1111 This can be called if you have precomputed the nonzero structure of the 1112 matrix and want to provide it to the matrix object to improve the performance 1113 of the MatSetValues() operation. 1114 1115 You MUST have set the correct numbers of nonzeros per row in the call to 1116 MatCreateSeqSBAIJ(), and the columns indices MUST be sorted. 1117 1118 MUST be called before any calls to MatSetValues() 1119 1120 .seealso: MatCreateSeqSBAIJ 1121 @*/ 1122 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1123 { 1124 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1125 1126 PetscFunctionBegin; 1127 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1128 PetscValidPointer(indices,2); 1129 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1130 if (f) { 1131 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1132 } else { 1133 SETERRQ(PETSC_ERR_SUP,"Wrong type of matrix to set column indices"); 1134 } 1135 PetscFunctionReturn(0); 1136 } 1137 1138 #undef __FUNCT__ 1139 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1140 PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A) 1141 { 1142 PetscErrorCode ierr; 1143 1144 PetscFunctionBegin; 1145 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1146 PetscFunctionReturn(0); 1147 } 1148 1149 #undef __FUNCT__ 1150 #define __FUNCT__ "MatGetArray_SeqSBAIJ" 1151 PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1152 { 1153 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1154 PetscFunctionBegin; 1155 *array = a->a; 1156 PetscFunctionReturn(0); 1157 } 1158 1159 #undef __FUNCT__ 1160 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ" 1161 PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1162 { 1163 PetscFunctionBegin; 1164 PetscFunctionReturn(0); 1165 } 1166 1167 #include "petscblaslapack.h" 1168 #undef __FUNCT__ 1169 #define __FUNCT__ "MatAXPY_SeqSBAIJ" 1170 PetscErrorCode MatAXPY_SeqSBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str) 1171 { 1172 Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data; 1173 PetscErrorCode ierr; 1174 PetscInt i,bs=Y->bs,bs2,j; 1175 PetscBLASInt bnz = (PetscBLASInt)x->nz,one = 1; 1176 1177 PetscFunctionBegin; 1178 if (str == SAME_NONZERO_PATTERN) { 1179 BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1180 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1181 if (y->xtoy && y->XtoY != X) { 1182 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1183 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1184 } 1185 if (!y->xtoy) { /* get xtoy */ 1186 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1187 y->XtoY = X; 1188 } 1189 bs2 = bs*bs; 1190 for (i=0; i<x->nz; i++) { 1191 j = 0; 1192 while (j < bs2){ 1193 y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]); 1194 j++; 1195 } 1196 } 1197 ierr = PetscLogInfo((0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)));CHKERRQ(ierr); 1198 } else { 1199 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1200 } 1201 PetscFunctionReturn(0); 1202 } 1203 1204 #undef __FUNCT__ 1205 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ" 1206 PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg) 1207 { 1208 PetscFunctionBegin; 1209 *flg = PETSC_TRUE; 1210 PetscFunctionReturn(0); 1211 } 1212 1213 #undef __FUNCT__ 1214 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ" 1215 PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg) 1216 { 1217 PetscFunctionBegin; 1218 *flg = PETSC_TRUE; 1219 PetscFunctionReturn(0); 1220 } 1221 1222 #undef __FUNCT__ 1223 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ" 1224 PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscTruth *flg) 1225 { 1226 PetscFunctionBegin; 1227 *flg = PETSC_FALSE; 1228 PetscFunctionReturn(0); 1229 } 1230 1231 /* -------------------------------------------------------------------*/ 1232 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1233 MatGetRow_SeqSBAIJ, 1234 MatRestoreRow_SeqSBAIJ, 1235 MatMult_SeqSBAIJ_N, 1236 /* 4*/ MatMultAdd_SeqSBAIJ_N, 1237 MatMult_SeqSBAIJ_N, 1238 MatMultAdd_SeqSBAIJ_N, 1239 MatSolve_SeqSBAIJ_N, 1240 0, 1241 0, 1242 /*10*/ 0, 1243 0, 1244 MatCholeskyFactor_SeqSBAIJ, 1245 MatRelax_SeqSBAIJ, 1246 MatTranspose_SeqSBAIJ, 1247 /*15*/ MatGetInfo_SeqSBAIJ, 1248 MatEqual_SeqSBAIJ, 1249 MatGetDiagonal_SeqSBAIJ, 1250 MatDiagonalScale_SeqSBAIJ, 1251 MatNorm_SeqSBAIJ, 1252 /*20*/ 0, 1253 MatAssemblyEnd_SeqSBAIJ, 1254 0, 1255 MatSetOption_SeqSBAIJ, 1256 MatZeroEntries_SeqSBAIJ, 1257 /*25*/ 0, 1258 0, 1259 0, 1260 MatCholeskyFactorSymbolic_SeqSBAIJ, 1261 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1262 /*30*/ MatSetUpPreallocation_SeqSBAIJ, 1263 0, 1264 MatICCFactorSymbolic_SeqSBAIJ, 1265 MatGetArray_SeqSBAIJ, 1266 MatRestoreArray_SeqSBAIJ, 1267 /*35*/ MatDuplicate_SeqSBAIJ, 1268 0, 1269 0, 1270 0, 1271 0, 1272 /*40*/ MatAXPY_SeqSBAIJ, 1273 MatGetSubMatrices_SeqSBAIJ, 1274 MatIncreaseOverlap_SeqSBAIJ, 1275 MatGetValues_SeqSBAIJ, 1276 0, 1277 /*45*/ MatPrintHelp_SeqSBAIJ, 1278 MatScale_SeqSBAIJ, 1279 0, 1280 0, 1281 0, 1282 /*50*/ 0, 1283 MatGetRowIJ_SeqSBAIJ, 1284 MatRestoreRowIJ_SeqSBAIJ, 1285 0, 1286 0, 1287 /*55*/ 0, 1288 0, 1289 0, 1290 0, 1291 MatSetValuesBlocked_SeqSBAIJ, 1292 /*60*/ MatGetSubMatrix_SeqSBAIJ, 1293 0, 1294 0, 1295 MatGetPetscMaps_Petsc, 1296 0, 1297 /*65*/ 0, 1298 0, 1299 0, 1300 0, 1301 0, 1302 /*70*/ MatGetRowMax_SeqSBAIJ, 1303 0, 1304 0, 1305 0, 1306 0, 1307 /*75*/ 0, 1308 0, 1309 0, 1310 0, 1311 0, 1312 /*80*/ 0, 1313 0, 1314 0, 1315 #if !defined(PETSC_USE_COMPLEX) 1316 MatGetInertia_SeqSBAIJ, 1317 #else 1318 0, 1319 #endif 1320 MatLoad_SeqSBAIJ, 1321 /*85*/ MatIsSymmetric_SeqSBAIJ, 1322 MatIsHermitian_SeqSBAIJ, 1323 MatIsStructurallySymmetric_SeqSBAIJ, 1324 0, 1325 0, 1326 /*90*/ 0, 1327 0, 1328 0, 1329 0, 1330 0, 1331 /*95*/ 0, 1332 0, 1333 0, 1334 0}; 1335 1336 EXTERN_C_BEGIN 1337 #undef __FUNCT__ 1338 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1339 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqSBAIJ(Mat mat) 1340 { 1341 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1342 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 1343 PetscErrorCode ierr; 1344 1345 PetscFunctionBegin; 1346 if (aij->nonew != 1) { 1347 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1348 } 1349 1350 /* allocate space for values if not already there */ 1351 if (!aij->saved_values) { 1352 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1353 } 1354 1355 /* copy values over */ 1356 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1357 PetscFunctionReturn(0); 1358 } 1359 EXTERN_C_END 1360 1361 EXTERN_C_BEGIN 1362 #undef __FUNCT__ 1363 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1364 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqSBAIJ(Mat mat) 1365 { 1366 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1367 PetscErrorCode ierr; 1368 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 1369 1370 PetscFunctionBegin; 1371 if (aij->nonew != 1) { 1372 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1373 } 1374 if (!aij->saved_values) { 1375 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 1376 } 1377 1378 /* copy values over */ 1379 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1380 PetscFunctionReturn(0); 1381 } 1382 EXTERN_C_END 1383 1384 EXTERN_C_BEGIN 1385 #undef __FUNCT__ 1386 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ" 1387 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 1388 { 1389 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1390 PetscErrorCode ierr; 1391 PetscInt i,mbs,bs2; 1392 PetscTruth skipallocation = PETSC_FALSE,flg; 1393 1394 PetscFunctionBegin; 1395 B->preallocated = PETSC_TRUE; 1396 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1397 mbs = B->m/bs; 1398 bs2 = bs*bs; 1399 1400 if (mbs*bs != B->m) { 1401 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1402 } 1403 1404 if (nz == MAT_SKIP_ALLOCATION) { 1405 skipallocation = PETSC_TRUE; 1406 nz = 0; 1407 } 1408 1409 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1410 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 1411 if (nnz) { 1412 for (i=0; i<mbs; i++) { 1413 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 1414 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],mbs); 1415 } 1416 } 1417 1418 ierr = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1419 if (!flg) { 1420 switch (bs) { 1421 case 1: 1422 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1423 B->ops->solve = MatSolve_SeqSBAIJ_1; 1424 B->ops->solves = MatSolves_SeqSBAIJ_1; 1425 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 1426 B->ops->mult = MatMult_SeqSBAIJ_1; 1427 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1428 break; 1429 case 2: 1430 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1431 B->ops->solve = MatSolve_SeqSBAIJ_2; 1432 B->ops->solvetranspose = MatSolve_SeqSBAIJ_2; 1433 B->ops->mult = MatMult_SeqSBAIJ_2; 1434 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1435 break; 1436 case 3: 1437 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1438 B->ops->solve = MatSolve_SeqSBAIJ_3; 1439 B->ops->solvetranspose = MatSolve_SeqSBAIJ_3; 1440 B->ops->mult = MatMult_SeqSBAIJ_3; 1441 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1442 break; 1443 case 4: 1444 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1445 B->ops->solve = MatSolve_SeqSBAIJ_4; 1446 B->ops->solvetranspose = MatSolve_SeqSBAIJ_4; 1447 B->ops->mult = MatMult_SeqSBAIJ_4; 1448 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1449 break; 1450 case 5: 1451 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1452 B->ops->solve = MatSolve_SeqSBAIJ_5; 1453 B->ops->solvetranspose = MatSolve_SeqSBAIJ_5; 1454 B->ops->mult = MatMult_SeqSBAIJ_5; 1455 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1456 break; 1457 case 6: 1458 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1459 B->ops->solve = MatSolve_SeqSBAIJ_6; 1460 B->ops->solvetranspose = MatSolve_SeqSBAIJ_6; 1461 B->ops->mult = MatMult_SeqSBAIJ_6; 1462 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1463 break; 1464 case 7: 1465 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1466 B->ops->solve = MatSolve_SeqSBAIJ_7; 1467 B->ops->solvetranspose = MatSolve_SeqSBAIJ_7; 1468 B->ops->mult = MatMult_SeqSBAIJ_7; 1469 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1470 break; 1471 } 1472 } 1473 1474 b->mbs = mbs; 1475 b->nbs = mbs; 1476 if (!skipallocation) { 1477 /* b->ilen will count nonzeros in each block row so far. */ 1478 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 1479 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1480 if (!nnz) { 1481 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1482 else if (nz <= 0) nz = 1; 1483 for (i=0; i<mbs; i++) { 1484 b->imax[i] = nz; 1485 } 1486 nz = nz*mbs; /* total nz */ 1487 } else { 1488 nz = 0; 1489 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1490 } 1491 /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1492 1493 /* allocate the matrix space */ 1494 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->m+1,PetscInt,&b->i);CHKERRQ(ierr); 1495 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1496 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1497 b->singlemalloc = PETSC_TRUE; 1498 1499 /* pointer to beginning of each row */ 1500 b->i[0] = 0; 1501 for (i=1; i<mbs+1; i++) { 1502 b->i[i] = b->i[i-1] + b->imax[i-1]; 1503 } 1504 } 1505 1506 B->bs = bs; 1507 b->bs2 = bs2; 1508 b->nz = 0; 1509 b->maxnz = nz*bs2; 1510 1511 b->inew = 0; 1512 b->jnew = 0; 1513 b->anew = 0; 1514 b->a2anew = 0; 1515 b->permute = PETSC_FALSE; 1516 PetscFunctionReturn(0); 1517 } 1518 EXTERN_C_END 1519 1520 EXTERN_C_BEGIN 1521 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqAIJ(Mat,const MatType,MatReuse,Mat*); 1522 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqBAIJ(Mat,const MatType,MatReuse,Mat*); 1523 EXTERN_C_END 1524 1525 /*MC 1526 MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices, 1527 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1528 1529 Options Database Keys: 1530 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions() 1531 1532 Level: beginner 1533 1534 .seealso: MatCreateSeqSBAIJ 1535 M*/ 1536 1537 EXTERN_C_BEGIN 1538 #undef __FUNCT__ 1539 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1540 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqSBAIJ(Mat B) 1541 { 1542 Mat_SeqSBAIJ *b; 1543 PetscErrorCode ierr; 1544 PetscMPIInt size; 1545 PetscTruth flg; 1546 1547 PetscFunctionBegin; 1548 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1549 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1550 B->m = B->M = PetscMax(B->m,B->M); 1551 B->n = B->N = PetscMax(B->n,B->N); 1552 1553 ierr = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1554 B->data = (void*)b; 1555 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1556 B->ops->destroy = MatDestroy_SeqSBAIJ; 1557 B->ops->view = MatView_SeqSBAIJ; 1558 B->factor = 0; 1559 B->mapping = 0; 1560 b->row = 0; 1561 b->icol = 0; 1562 b->reallocs = 0; 1563 b->saved_values = 0; 1564 1565 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1566 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1567 1568 b->sorted = PETSC_FALSE; 1569 b->roworiented = PETSC_TRUE; 1570 b->nonew = 0; 1571 b->diag = 0; 1572 b->solve_work = 0; 1573 b->mult_work = 0; 1574 B->spptr = 0; 1575 b->keepzeroedrows = PETSC_FALSE; 1576 b->xtoy = 0; 1577 b->XtoY = 0; 1578 1579 b->inew = 0; 1580 b->jnew = 0; 1581 b->anew = 0; 1582 b->a2anew = 0; 1583 b->permute = PETSC_FALSE; 1584 1585 b->ignore_ltriangular = PETSC_FALSE; 1586 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_ignore_lower_triangular",&flg);CHKERRQ(ierr); 1587 if (flg) b->ignore_ltriangular = PETSC_TRUE; 1588 1589 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1590 "MatStoreValues_SeqSBAIJ", 1591 MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1592 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1593 "MatRetrieveValues_SeqSBAIJ", 1594 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1595 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1596 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1597 MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1598 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C", 1599 "MatConvert_SeqSBAIJ_SeqAIJ", 1600 MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr); 1601 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C", 1602 "MatConvert_SeqSBAIJ_SeqBAIJ", 1603 MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr); 1604 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C", 1605 "MatSeqSBAIJSetPreallocation_SeqSBAIJ", 1606 MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr); 1607 1608 B->symmetric = PETSC_TRUE; 1609 B->structurally_symmetric = PETSC_TRUE; 1610 B->symmetric_set = PETSC_TRUE; 1611 B->structurally_symmetric_set = PETSC_TRUE; 1612 PetscFunctionReturn(0); 1613 } 1614 EXTERN_C_END 1615 1616 #undef __FUNCT__ 1617 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1618 /*@C 1619 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1620 compressed row) format. For good matrix assembly performance the 1621 user should preallocate the matrix storage by setting the parameter nz 1622 (or the array nnz). By setting these parameters accurately, performance 1623 during matrix assembly can be increased by more than a factor of 50. 1624 1625 Collective on Mat 1626 1627 Input Parameters: 1628 + A - the symmetric matrix 1629 . bs - size of block 1630 . nz - number of block nonzeros per block row (same for all rows) 1631 - nnz - array containing the number of block nonzeros in the upper triangular plus 1632 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1633 1634 Options Database Keys: 1635 . -mat_no_unroll - uses code that does not unroll the loops in the 1636 block calculations (much slower) 1637 . -mat_block_size - size of the blocks to use 1638 1639 Level: intermediate 1640 1641 Notes: 1642 Specify the preallocated storage with either nz or nnz (not both). 1643 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1644 allocation. For additional details, see the users manual chapter on 1645 matrices. 1646 1647 If the nnz parameter is given then the nz parameter is ignored 1648 1649 1650 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1651 @*/ 1652 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 1653 { 1654 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 1655 1656 PetscFunctionBegin; 1657 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1658 if (f) { 1659 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 1660 } 1661 PetscFunctionReturn(0); 1662 } 1663 1664 #undef __FUNCT__ 1665 #define __FUNCT__ "MatCreateSeqSBAIJ" 1666 /*@C 1667 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1668 compressed row) format. For good matrix assembly performance the 1669 user should preallocate the matrix storage by setting the parameter nz 1670 (or the array nnz). By setting these parameters accurately, performance 1671 during matrix assembly can be increased by more than a factor of 50. 1672 1673 Collective on MPI_Comm 1674 1675 Input Parameters: 1676 + comm - MPI communicator, set to PETSC_COMM_SELF 1677 . bs - size of block 1678 . m - number of rows, or number of columns 1679 . nz - number of block nonzeros per block row (same for all rows) 1680 - nnz - array containing the number of block nonzeros in the upper triangular plus 1681 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1682 1683 Output Parameter: 1684 . A - the symmetric matrix 1685 1686 Options Database Keys: 1687 . -mat_no_unroll - uses code that does not unroll the loops in the 1688 block calculations (much slower) 1689 . -mat_block_size - size of the blocks to use 1690 1691 Level: intermediate 1692 1693 Notes: 1694 1695 Specify the preallocated storage with either nz or nnz (not both). 1696 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1697 allocation. For additional details, see the users manual chapter on 1698 matrices. 1699 1700 If the nnz parameter is given then the nz parameter is ignored 1701 1702 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1703 @*/ 1704 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 1705 { 1706 PetscErrorCode ierr; 1707 1708 PetscFunctionBegin; 1709 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1710 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1711 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 1712 PetscFunctionReturn(0); 1713 } 1714 1715 #undef __FUNCT__ 1716 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1717 PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1718 { 1719 Mat C; 1720 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1721 PetscErrorCode ierr; 1722 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 1723 1724 PetscFunctionBegin; 1725 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1726 1727 *B = 0; 1728 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1729 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 1730 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1731 c = (Mat_SeqSBAIJ*)C->data; 1732 1733 C->preallocated = PETSC_TRUE; 1734 C->factor = A->factor; 1735 c->row = 0; 1736 c->icol = 0; 1737 c->saved_values = 0; 1738 c->keepzeroedrows = a->keepzeroedrows; 1739 C->assembled = PETSC_TRUE; 1740 1741 C->M = A->M; 1742 C->N = A->N; 1743 C->bs = A->bs; 1744 c->bs2 = a->bs2; 1745 c->mbs = a->mbs; 1746 c->nbs = a->nbs; 1747 1748 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->imax);CHKERRQ(ierr); 1749 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->ilen);CHKERRQ(ierr); 1750 for (i=0; i<mbs; i++) { 1751 c->imax[i] = a->imax[i]; 1752 c->ilen[i] = a->ilen[i]; 1753 } 1754 ierr = PetscLogObjectMemory(C,2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr); 1755 1756 /* allocate the matrix space */ 1757 ierr = PetscMalloc3(bs2*nz,MatScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 1758 c->singlemalloc = PETSC_TRUE; 1759 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1760 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt)));CHKERRQ(ierr); 1761 if (mbs > 0) { 1762 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1763 if (cpvalues == MAT_COPY_VALUES) { 1764 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1765 } else { 1766 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1767 } 1768 } 1769 1770 c->sorted = a->sorted; 1771 c->roworiented = a->roworiented; 1772 c->nonew = a->nonew; 1773 1774 if (a->diag) { 1775 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 1776 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1777 for (i=0; i<mbs; i++) { 1778 c->diag[i] = a->diag[i]; 1779 } 1780 } else c->diag = 0; 1781 c->nz = a->nz; 1782 c->maxnz = a->maxnz; 1783 c->solve_work = 0; 1784 c->mult_work = 0; 1785 *B = C; 1786 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1787 PetscFunctionReturn(0); 1788 } 1789 1790 #undef __FUNCT__ 1791 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1792 PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer,const MatType type,Mat *A) 1793 { 1794 Mat_SeqSBAIJ *a; 1795 Mat B; 1796 PetscErrorCode ierr; 1797 int fd; 1798 PetscMPIInt size; 1799 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 1800 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 1801 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 1802 PetscInt *masked,nmask,tmp,bs2,ishift; 1803 PetscScalar *aa; 1804 MPI_Comm comm = ((PetscObject)viewer)->comm; 1805 1806 PetscFunctionBegin; 1807 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1808 bs2 = bs*bs; 1809 1810 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1811 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1812 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1813 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1814 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1815 M = header[1]; N = header[2]; nz = header[3]; 1816 1817 if (header[3] < 0) { 1818 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1819 } 1820 1821 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1822 1823 /* 1824 This code adds extra rows to make sure the number of rows is 1825 divisible by the blocksize 1826 */ 1827 mbs = M/bs; 1828 extra_rows = bs - M + bs*(mbs); 1829 if (extra_rows == bs) extra_rows = 0; 1830 else mbs++; 1831 if (extra_rows) { 1832 ierr = PetscLogInfo((0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr); 1833 } 1834 1835 /* read in row lengths */ 1836 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1837 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1838 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1839 1840 /* read in column indices */ 1841 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1842 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1843 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1844 1845 /* loop over row lengths determining block row lengths */ 1846 ierr = PetscMalloc(mbs*sizeof(PetscInt),&s_browlengths);CHKERRQ(ierr); 1847 ierr = PetscMemzero(s_browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1848 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 1849 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1850 masked = mask + mbs; 1851 rowcount = 0; nzcount = 0; 1852 for (i=0; i<mbs; i++) { 1853 nmask = 0; 1854 for (j=0; j<bs; j++) { 1855 kmax = rowlengths[rowcount]; 1856 for (k=0; k<kmax; k++) { 1857 tmp = jj[nzcount++]/bs; /* block col. index */ 1858 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1859 } 1860 rowcount++; 1861 } 1862 s_browlengths[i] += nmask; 1863 1864 /* zero out the mask elements we set */ 1865 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1866 } 1867 1868 /* create our matrix */ 1869 ierr = MatCreate(comm,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows,&B);CHKERRQ(ierr); 1870 ierr = MatSetType(B,type);CHKERRQ(ierr); 1871 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(B,bs,0,s_browlengths);CHKERRQ(ierr); 1872 a = (Mat_SeqSBAIJ*)B->data; 1873 1874 /* set matrix "i" values */ 1875 a->i[0] = 0; 1876 for (i=1; i<= mbs; i++) { 1877 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1878 a->ilen[i-1] = s_browlengths[i-1]; 1879 } 1880 a->nz = a->i[mbs]; 1881 1882 /* read in nonzero values */ 1883 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1884 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1885 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1886 1887 /* set "a" and "j" values into matrix */ 1888 nzcount = 0; jcount = 0; 1889 for (i=0; i<mbs; i++) { 1890 nzcountb = nzcount; 1891 nmask = 0; 1892 for (j=0; j<bs; j++) { 1893 kmax = rowlengths[i*bs+j]; 1894 for (k=0; k<kmax; k++) { 1895 tmp = jj[nzcount++]/bs; /* block col. index */ 1896 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1897 } 1898 } 1899 /* sort the masked values */ 1900 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1901 1902 /* set "j" values into matrix */ 1903 maskcount = 1; 1904 for (j=0; j<nmask; j++) { 1905 a->j[jcount++] = masked[j]; 1906 mask[masked[j]] = maskcount++; 1907 } 1908 1909 /* set "a" values into matrix */ 1910 ishift = bs2*a->i[i]; 1911 for (j=0; j<bs; j++) { 1912 kmax = rowlengths[i*bs+j]; 1913 for (k=0; k<kmax; k++) { 1914 tmp = jj[nzcountb]/bs ; /* block col. index */ 1915 if (tmp >= i){ 1916 block = mask[tmp] - 1; 1917 point = jj[nzcountb] - bs*tmp; 1918 idx = ishift + bs2*block + j + bs*point; 1919 a->a[idx] = aa[nzcountb]; 1920 } 1921 nzcountb++; 1922 } 1923 } 1924 /* zero out the mask elements we set */ 1925 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1926 } 1927 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1928 1929 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1930 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1931 ierr = PetscFree(aa);CHKERRQ(ierr); 1932 ierr = PetscFree(jj);CHKERRQ(ierr); 1933 ierr = PetscFree(mask);CHKERRQ(ierr); 1934 1935 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1936 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1937 ierr = MatView_Private(B);CHKERRQ(ierr); 1938 *A = B; 1939 PetscFunctionReturn(0); 1940 } 1941 1942 #undef __FUNCT__ 1943 #define __FUNCT__ "MatRelax_SeqSBAIJ" 1944 PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1945 { 1946 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1947 MatScalar *aa=a->a,*v,*v1; 1948 PetscScalar *x,*b,*t,sum,d; 1949 PetscErrorCode ierr; 1950 PetscInt m=a->mbs,bs=A->bs,*ai=a->i,*aj=a->j; 1951 PetscInt nz,nz1,*vj,*vj1,i; 1952 1953 PetscFunctionBegin; 1954 its = its*lits; 1955 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1956 1957 if (bs > 1) 1958 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 1959 1960 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1961 if (xx != bb) { 1962 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1963 } else { 1964 b = x; 1965 } 1966 1967 ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr); 1968 1969 if (flag & SOR_ZERO_INITIAL_GUESS) { 1970 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1971 for (i=0; i<m; i++) 1972 t[i] = b[i]; 1973 1974 for (i=0; i<m; i++){ 1975 d = *(aa + ai[i]); /* diag[i] */ 1976 v = aa + ai[i] + 1; 1977 vj = aj + ai[i] + 1; 1978 nz = ai[i+1] - ai[i] - 1; 1979 x[i] = omega*t[i]/d; 1980 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 1981 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 1982 } 1983 } 1984 1985 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 1986 for (i=0; i<m; i++) 1987 t[i] = b[i]; 1988 1989 for (i=0; i<m-1; i++){ /* update rhs */ 1990 v = aa + ai[i] + 1; 1991 vj = aj + ai[i] + 1; 1992 nz = ai[i+1] - ai[i] - 1; 1993 while (nz--) t[*vj++] -= x[i]*(*v++); 1994 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 1995 } 1996 for (i=m-1; i>=0; i--){ 1997 d = *(aa + ai[i]); 1998 v = aa + ai[i] + 1; 1999 vj = aj + ai[i] + 1; 2000 nz = ai[i+1] - ai[i] - 1; 2001 sum = t[i]; 2002 while (nz--) sum -= x[*vj++]*(*v++); 2003 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2004 x[i] = (1-omega)*x[i] + omega*sum/d; 2005 } 2006 } 2007 its--; 2008 } 2009 2010 while (its--) { 2011 /* 2012 forward sweep: 2013 for i=0,...,m-1: 2014 sum[i] = (b[i] - U(i,:)x )/d[i]; 2015 x[i] = (1-omega)x[i] + omega*sum[i]; 2016 b = b - x[i]*U^T(i,:); 2017 2018 */ 2019 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2020 for (i=0; i<m; i++) 2021 t[i] = b[i]; 2022 2023 for (i=0; i<m; i++){ 2024 d = *(aa + ai[i]); /* diag[i] */ 2025 v = aa + ai[i] + 1; v1=v; 2026 vj = aj + ai[i] + 1; vj1=vj; 2027 nz = ai[i+1] - ai[i] - 1; nz1=nz; 2028 sum = t[i]; 2029 while (nz1--) sum -= (*v1++)*x[*vj1++]; 2030 x[i] = (1-omega)*x[i] + omega*sum/d; 2031 while (nz--) t[*vj++] -= x[i]*(*v++); 2032 ierr = PetscLogFlops(4*nz-2);CHKERRQ(ierr); 2033 } 2034 } 2035 2036 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2037 /* 2038 backward sweep: 2039 b = b - x[i]*U^T(i,:), i=0,...,n-2 2040 for i=m-1,...,0: 2041 sum[i] = (b[i] - U(i,:)x )/d[i]; 2042 x[i] = (1-omega)x[i] + omega*sum[i]; 2043 */ 2044 for (i=0; i<m; i++) 2045 t[i] = b[i]; 2046 2047 for (i=0; i<m-1; i++){ /* update rhs */ 2048 v = aa + ai[i] + 1; 2049 vj = aj + ai[i] + 1; 2050 nz = ai[i+1] - ai[i] - 1; 2051 while (nz--) t[*vj++] -= x[i]*(*v++); 2052 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2053 } 2054 for (i=m-1; i>=0; i--){ 2055 d = *(aa + ai[i]); 2056 v = aa + ai[i] + 1; 2057 vj = aj + ai[i] + 1; 2058 nz = ai[i+1] - ai[i] - 1; 2059 sum = t[i]; 2060 while (nz--) sum -= x[*vj++]*(*v++); 2061 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2062 x[i] = (1-omega)*x[i] + omega*sum/d; 2063 } 2064 } 2065 } 2066 2067 ierr = PetscFree(t);CHKERRQ(ierr); 2068 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2069 if (bb != xx) { 2070 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2071 } 2072 PetscFunctionReturn(0); 2073 } 2074 2075 2076 2077 2078 2079 2080