1 2 /* 3 Defines the basic matrix operations for the SBAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 7 #include "src/inline/spops.h" 8 #include "src/mat/impls/sbaij/seq/sbaij.h" 9 10 #define CHUNKSIZE 10 11 12 /* 13 Checks for missing diagonals 14 */ 15 #undef __FUNCT__ 16 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 17 PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A) 18 { 19 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 20 PetscErrorCode ierr; 21 PetscInt *diag,*jj = a->j,i; 22 23 PetscFunctionBegin; 24 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 25 diag = a->diag; 26 for (i=0; i<a->mbs; i++) { 27 if (jj[diag[i]] != i) SETERRQ1(PETSC_ERR_ARG_CORRUPT,"Matrix is missing diagonal number %D",i); 28 } 29 PetscFunctionReturn(0); 30 } 31 32 #undef __FUNCT__ 33 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 34 PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A) 35 { 36 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 37 PetscErrorCode ierr; 38 PetscInt i,mbs = a->mbs; 39 40 PetscFunctionBegin; 41 if (a->diag) PetscFunctionReturn(0); 42 43 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 44 ierr = PetscLogObjectMemory(A,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 45 for (i=0; i<mbs; i++) a->diag[i] = a->i[i]; 46 PetscFunctionReturn(0); 47 } 48 49 #undef __FUNCT__ 50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 51 static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 52 { 53 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 54 PetscInt n = a->mbs,i; 55 56 PetscFunctionBegin; 57 *nn = n; 58 if (!ia) PetscFunctionReturn(0); 59 60 if (oshift == 1) { 61 /* temporarily add 1 to i and j indices */ 62 PetscInt nz = a->i[n]; 63 for (i=0; i<nz; i++) a->j[i]++; 64 for (i=0; i<n+1; i++) a->i[i]++; 65 *ia = a->i; *ja = a->j; 66 } else { 67 *ia = a->i; *ja = a->j; 68 } 69 PetscFunctionReturn(0); 70 } 71 72 #undef __FUNCT__ 73 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 74 static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 75 { 76 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 77 PetscInt i,n = a->mbs; 78 79 PetscFunctionBegin; 80 if (!ia) PetscFunctionReturn(0); 81 82 if (oshift == 1) { 83 PetscInt nz = a->i[n]-1; 84 for (i=0; i<nz; i++) a->j[i]--; 85 for (i=0; i<n+1; i++) a->i[i]--; 86 } 87 PetscFunctionReturn(0); 88 } 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 92 PetscErrorCode MatDestroy_SeqSBAIJ(Mat A) 93 { 94 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 95 PetscErrorCode ierr; 96 97 PetscFunctionBegin; 98 PetscLogObjectState((PetscObject)A,"Rows=%D, NZ=%D",A->m,a->nz); 99 ierr = PetscFree(a->a);CHKERRQ(ierr); 100 if (!a->singlemalloc) { 101 ierr = PetscFree(a->i);CHKERRQ(ierr); 102 ierr = PetscFree(a->j);CHKERRQ(ierr); 103 } 104 if (a->row) { 105 ierr = ISDestroy(a->row);CHKERRQ(ierr); 106 } 107 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 108 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 109 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 110 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 111 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 112 if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);} 113 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 114 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 115 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 116 117 if (a->inew){ 118 ierr = PetscFree(a->inew);CHKERRQ(ierr); 119 a->inew = 0; 120 } 121 ierr = PetscFree(a);CHKERRQ(ierr); 122 123 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 124 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 125 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 126 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 127 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqbaij_C","",PETSC_NULL);CHKERRQ(ierr); 128 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 129 PetscFunctionReturn(0); 130 } 131 132 #undef __FUNCT__ 133 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 134 PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op) 135 { 136 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 137 138 PetscFunctionBegin; 139 switch (op) { 140 case MAT_ROW_ORIENTED: 141 a->roworiented = PETSC_TRUE; 142 break; 143 case MAT_COLUMN_ORIENTED: 144 a->roworiented = PETSC_FALSE; 145 break; 146 case MAT_COLUMNS_SORTED: 147 a->sorted = PETSC_TRUE; 148 break; 149 case MAT_COLUMNS_UNSORTED: 150 a->sorted = PETSC_FALSE; 151 break; 152 case MAT_KEEP_ZEROED_ROWS: 153 a->keepzeroedrows = PETSC_TRUE; 154 break; 155 case MAT_NO_NEW_NONZERO_LOCATIONS: 156 a->nonew = 1; 157 break; 158 case MAT_NEW_NONZERO_LOCATION_ERR: 159 a->nonew = -1; 160 break; 161 case MAT_NEW_NONZERO_ALLOCATION_ERR: 162 a->nonew = -2; 163 break; 164 case MAT_YES_NEW_NONZERO_LOCATIONS: 165 a->nonew = 0; 166 break; 167 case MAT_ROWS_SORTED: 168 case MAT_ROWS_UNSORTED: 169 case MAT_YES_NEW_DIAGONALS: 170 case MAT_IGNORE_OFF_PROC_ENTRIES: 171 case MAT_USE_HASH_TABLE: 172 PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n"); 173 break; 174 case MAT_NO_NEW_DIAGONALS: 175 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 176 case MAT_NOT_SYMMETRIC: 177 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 178 case MAT_HERMITIAN: 179 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 180 case MAT_SYMMETRIC: 181 case MAT_STRUCTURALLY_SYMMETRIC: 182 case MAT_NOT_HERMITIAN: 183 case MAT_SYMMETRY_ETERNAL: 184 case MAT_NOT_SYMMETRY_ETERNAL: 185 case MAT_IGNORE_LOWER_TRIANGULAR: 186 a->ignore_ltriangular = PETSC_TRUE; 187 break; 188 case MAT_ERROR_LOWER_TRIANGULAR: 189 a->ignore_ltriangular = PETSC_FALSE; 190 break; 191 default: 192 SETERRQ(PETSC_ERR_SUP,"unknown option"); 193 } 194 PetscFunctionReturn(0); 195 } 196 197 #undef __FUNCT__ 198 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 199 PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **v) 200 { 201 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 202 PetscErrorCode ierr; 203 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2; 204 MatScalar *aa,*aa_i; 205 PetscScalar *v_i; 206 207 PetscFunctionBegin; 208 bs = A->bs; 209 ai = a->i; 210 aj = a->j; 211 aa = a->a; 212 bs2 = a->bs2; 213 214 if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %D out of range", row); 215 216 bn = row/bs; /* Block number */ 217 bp = row % bs; /* Block position */ 218 M = ai[bn+1] - ai[bn]; 219 *ncols = bs*M; 220 221 if (v) { 222 *v = 0; 223 if (*ncols) { 224 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 225 for (i=0; i<M; i++) { /* for each block in the block row */ 226 v_i = *v + i*bs; 227 aa_i = aa + bs2*(ai[bn] + i); 228 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 229 } 230 } 231 } 232 233 if (cols) { 234 *cols = 0; 235 if (*ncols) { 236 ierr = PetscMalloc((*ncols+row)*sizeof(PetscInt),cols);CHKERRQ(ierr); 237 for (i=0; i<M; i++) { /* for each block in the block row */ 238 cols_i = *cols + i*bs; 239 itmp = bs*aj[ai[bn] + i]; 240 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 241 } 242 } 243 } 244 245 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 246 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 247 #ifdef column_search 248 v_i = *v + M*bs; 249 cols_i = *cols + M*bs; 250 for (i=0; i<bn; i++){ /* for each block row */ 251 M = ai[i+1] - ai[i]; 252 for (j=0; j<M; j++){ 253 itmp = aj[ai[i] + j]; /* block column value */ 254 if (itmp == bn){ 255 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 256 for (k=0; k<bs; k++) { 257 *cols_i++ = i*bs+k; 258 *v_i++ = aa_i[k]; 259 } 260 *ncols += bs; 261 break; 262 } 263 } 264 } 265 #endif 266 PetscFunctionReturn(0); 267 } 268 269 #undef __FUNCT__ 270 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 271 PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 272 { 273 PetscErrorCode ierr; 274 275 PetscFunctionBegin; 276 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 277 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 278 PetscFunctionReturn(0); 279 } 280 281 #undef __FUNCT__ 282 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 283 PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,Mat *B) 284 { 285 PetscErrorCode ierr; 286 PetscFunctionBegin; 287 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 288 PetscFunctionReturn(0); 289 } 290 291 #undef __FUNCT__ 292 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 293 static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 294 { 295 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 296 PetscErrorCode ierr; 297 PetscInt i,j,bs = A->bs,k,l,bs2=a->bs2; 298 char *name; 299 PetscViewerFormat format; 300 301 PetscFunctionBegin; 302 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 303 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 304 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 305 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 306 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 307 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 308 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 309 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 310 for (i=0; i<a->mbs; i++) { 311 for (j=0; j<bs; j++) { 312 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 313 for (k=a->i[i]; k<a->i[i+1]; k++) { 314 for (l=0; l<bs; l++) { 315 #if defined(PETSC_USE_COMPLEX) 316 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 317 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 318 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 319 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 320 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 321 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 322 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 323 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 324 } 325 #else 326 if (a->a[bs2*k + l*bs + j] != 0.0) { 327 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 328 } 329 #endif 330 } 331 } 332 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 333 } 334 } 335 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 336 } else { 337 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 338 for (i=0; i<a->mbs; i++) { 339 for (j=0; j<bs; j++) { 340 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 341 for (k=a->i[i]; k<a->i[i+1]; k++) { 342 for (l=0; l<bs; l++) { 343 #if defined(PETSC_USE_COMPLEX) 344 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 345 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 346 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 347 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 348 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 349 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 350 } else { 351 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 352 } 353 #else 354 ierr = PetscViewerASCIIPrintf(viewer," %D %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 355 #endif 356 } 357 } 358 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 359 } 360 } 361 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 362 } 363 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 364 PetscFunctionReturn(0); 365 } 366 367 #undef __FUNCT__ 368 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 369 static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 370 { 371 Mat A = (Mat) Aa; 372 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 373 PetscErrorCode ierr; 374 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->bs,bs2=a->bs2; 375 PetscMPIInt rank; 376 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 377 MatScalar *aa; 378 MPI_Comm comm; 379 PetscViewer viewer; 380 381 PetscFunctionBegin; 382 /* 383 This is nasty. If this is called from an originally parallel matrix 384 then all processes call this,but only the first has the matrix so the 385 rest should return immediately. 386 */ 387 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 388 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 389 if (rank) PetscFunctionReturn(0); 390 391 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 392 393 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 394 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 395 396 /* loop over matrix elements drawing boxes */ 397 color = PETSC_DRAW_BLUE; 398 for (i=0,row=0; i<mbs; i++,row+=bs) { 399 for (j=a->i[i]; j<a->i[i+1]; j++) { 400 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 401 x_l = a->j[j]*bs; x_r = x_l + 1.0; 402 aa = a->a + j*bs2; 403 for (k=0; k<bs; k++) { 404 for (l=0; l<bs; l++) { 405 if (PetscRealPart(*aa++) >= 0.) continue; 406 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 407 } 408 } 409 } 410 } 411 color = PETSC_DRAW_CYAN; 412 for (i=0,row=0; i<mbs; i++,row+=bs) { 413 for (j=a->i[i]; j<a->i[i+1]; j++) { 414 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 415 x_l = a->j[j]*bs; x_r = x_l + 1.0; 416 aa = a->a + j*bs2; 417 for (k=0; k<bs; k++) { 418 for (l=0; l<bs; l++) { 419 if (PetscRealPart(*aa++) != 0.) continue; 420 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 421 } 422 } 423 } 424 } 425 426 color = PETSC_DRAW_RED; 427 for (i=0,row=0; i<mbs; i++,row+=bs) { 428 for (j=a->i[i]; j<a->i[i+1]; j++) { 429 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 430 x_l = a->j[j]*bs; x_r = x_l + 1.0; 431 aa = a->a + j*bs2; 432 for (k=0; k<bs; k++) { 433 for (l=0; l<bs; l++) { 434 if (PetscRealPart(*aa++) <= 0.) continue; 435 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 436 } 437 } 438 } 439 } 440 PetscFunctionReturn(0); 441 } 442 443 #undef __FUNCT__ 444 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 445 static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 446 { 447 PetscErrorCode ierr; 448 PetscReal xl,yl,xr,yr,w,h; 449 PetscDraw draw; 450 PetscTruth isnull; 451 452 PetscFunctionBegin; 453 454 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 455 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 456 457 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 458 xr = A->m; yr = A->m; h = yr/10.0; w = xr/10.0; 459 xr += w; yr += h; xl = -w; yl = -h; 460 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 461 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 462 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 463 PetscFunctionReturn(0); 464 } 465 466 #undef __FUNCT__ 467 #define __FUNCT__ "MatView_SeqSBAIJ" 468 PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 469 { 470 PetscErrorCode ierr; 471 PetscTruth iascii,isdraw; 472 473 PetscFunctionBegin; 474 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 475 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 476 if (iascii){ 477 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 478 } else if (isdraw) { 479 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 480 } else { 481 Mat B; 482 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 483 ierr = MatView(B,viewer);CHKERRQ(ierr); 484 ierr = MatDestroy(B);CHKERRQ(ierr); 485 } 486 PetscFunctionReturn(0); 487 } 488 489 490 #undef __FUNCT__ 491 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 492 PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 493 { 494 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 495 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 496 PetscInt *ai = a->i,*ailen = a->ilen; 497 PetscInt brow,bcol,ridx,cidx,bs=A->bs,bs2=a->bs2; 498 MatScalar *ap,*aa = a->a,zero = 0.0; 499 500 PetscFunctionBegin; 501 for (k=0; k<m; k++) { /* loop over rows */ 502 row = im[k]; brow = row/bs; 503 if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",row); 504 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1); 505 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 506 nrow = ailen[brow]; 507 for (l=0; l<n; l++) { /* loop over columns */ 508 if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",in[l]); 509 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->n-1); 510 col = in[l] ; 511 bcol = col/bs; 512 cidx = col%bs; 513 ridx = row%bs; 514 high = nrow; 515 low = 0; /* assume unsorted */ 516 while (high-low > 5) { 517 t = (low+high)/2; 518 if (rp[t] > bcol) high = t; 519 else low = t; 520 } 521 for (i=low; i<high; i++) { 522 if (rp[i] > bcol) break; 523 if (rp[i] == bcol) { 524 *v++ = ap[bs2*i+bs*cidx+ridx]; 525 goto finished; 526 } 527 } 528 *v++ = zero; 529 finished:; 530 } 531 } 532 PetscFunctionReturn(0); 533 } 534 535 536 #undef __FUNCT__ 537 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 538 PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 539 { 540 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 541 PetscErrorCode ierr; 542 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 543 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 544 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->bs,stepval; 545 PetscTruth roworiented=a->roworiented; 546 const MatScalar *value = v; 547 MatScalar *ap,*aa = a->a,*bap; 548 549 PetscFunctionBegin; 550 if (roworiented) { 551 stepval = (n-1)*bs; 552 } else { 553 stepval = (m-1)*bs; 554 } 555 for (k=0; k<m; k++) { /* loop over added rows */ 556 row = im[k]; 557 if (row < 0) continue; 558 #if defined(PETSC_USE_DEBUG) 559 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 560 #endif 561 rp = aj + ai[row]; 562 ap = aa + bs2*ai[row]; 563 rmax = imax[row]; 564 nrow = ailen[row]; 565 low = 0; 566 high = nrow; 567 for (l=0; l<n; l++) { /* loop over added columns */ 568 if (in[l] < 0) continue; 569 col = in[l]; 570 #if defined(PETSC_USE_DEBUG) 571 if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",col,a->nbs-1); 572 #endif 573 if (col < row) continue; /* ignore lower triangular block */ 574 if (roworiented) { 575 value = v + k*(stepval+bs)*bs + l*bs; 576 } else { 577 value = v + l*(stepval+bs)*bs + k*bs; 578 } 579 if (col < lastcol) low = 0; else high = nrow; 580 lastcol = col; 581 while (high-low > 7) { 582 t = (low+high)/2; 583 if (rp[t] > col) high = t; 584 else low = t; 585 } 586 for (i=low; i<high; i++) { 587 if (rp[i] > col) break; 588 if (rp[i] == col) { 589 bap = ap + bs2*i; 590 if (roworiented) { 591 if (is == ADD_VALUES) { 592 for (ii=0; ii<bs; ii++,value+=stepval) { 593 for (jj=ii; jj<bs2; jj+=bs) { 594 bap[jj] += *value++; 595 } 596 } 597 } else { 598 for (ii=0; ii<bs; ii++,value+=stepval) { 599 for (jj=ii; jj<bs2; jj+=bs) { 600 bap[jj] = *value++; 601 } 602 } 603 } 604 } else { 605 if (is == ADD_VALUES) { 606 for (ii=0; ii<bs; ii++,value+=stepval) { 607 for (jj=0; jj<bs; jj++) { 608 *bap++ += *value++; 609 } 610 } 611 } else { 612 for (ii=0; ii<bs; ii++,value+=stepval) { 613 for (jj=0; jj<bs; jj++) { 614 *bap++ = *value++; 615 } 616 } 617 } 618 } 619 goto noinsert2; 620 } 621 } 622 if (nonew == 1) goto noinsert2; 623 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 624 if (nrow >= rmax) { 625 /* there is no extra room in row, therefore enlarge */ 626 PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 627 MatScalar *new_a; 628 629 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 630 631 /* malloc new storage space */ 632 len = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); 633 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 634 new_j = (PetscInt*)(new_a + bs2*new_nz); 635 new_i = new_j + new_nz; 636 637 /* copy over old data into new slots */ 638 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 639 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 640 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); 641 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 642 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); 643 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 644 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 645 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 646 /* free up old matrix storage */ 647 ierr = PetscFree(a->a);CHKERRQ(ierr); 648 if (!a->singlemalloc) { 649 ierr = PetscFree(a->i);CHKERRQ(ierr); 650 ierr = PetscFree(a->j);CHKERRQ(ierr); 651 } 652 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 653 a->singlemalloc = PETSC_TRUE; 654 655 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 656 rmax = imax[row] = imax[row] + CHUNKSIZE; 657 ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); 658 a->maxnz += bs2*CHUNKSIZE; 659 a->reallocs++; 660 a->nz++; 661 } 662 N = nrow++ - 1; 663 /* shift up all the later entries in this row */ 664 for (ii=N; ii>=i; ii--) { 665 rp[ii+1] = rp[ii]; 666 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 667 } 668 if (N >= i) { 669 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 670 } 671 rp[i] = col; 672 bap = ap + bs2*i; 673 if (roworiented) { 674 for (ii=0; ii<bs; ii++,value+=stepval) { 675 for (jj=ii; jj<bs2; jj+=bs) { 676 bap[jj] = *value++; 677 } 678 } 679 } else { 680 for (ii=0; ii<bs; ii++,value+=stepval) { 681 for (jj=0; jj<bs; jj++) { 682 *bap++ = *value++; 683 } 684 } 685 } 686 noinsert2:; 687 low = i; 688 } 689 ailen[row] = nrow; 690 } 691 PetscFunctionReturn(0); 692 } 693 694 #undef __FUNCT__ 695 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 696 PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 697 { 698 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 699 PetscErrorCode ierr; 700 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 701 PetscInt m = A->m,*ip,N,*ailen = a->ilen; 702 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 703 MatScalar *aa = a->a,*ap; 704 705 PetscFunctionBegin; 706 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 707 708 if (m) rmax = ailen[0]; 709 for (i=1; i<mbs; i++) { 710 /* move each row back by the amount of empty slots (fshift) before it*/ 711 fshift += imax[i-1] - ailen[i-1]; 712 rmax = PetscMax(rmax,ailen[i]); 713 if (fshift) { 714 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 715 N = ailen[i]; 716 for (j=0; j<N; j++) { 717 ip[j-fshift] = ip[j]; 718 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 719 } 720 } 721 ai[i] = ai[i-1] + ailen[i-1]; 722 } 723 if (mbs) { 724 fshift += imax[mbs-1] - ailen[mbs-1]; 725 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 726 } 727 /* reset ilen and imax for each row */ 728 for (i=0; i<mbs; i++) { 729 ailen[i] = imax[i] = ai[i+1] - ai[i]; 730 } 731 a->nz = ai[mbs]; 732 733 /* diagonals may have moved, reset it */ 734 if (a->diag) { 735 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 736 } 737 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n", 738 m,A->m,A->bs,fshift*bs2,a->nz*bs2); 739 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %D\n", 740 a->reallocs); 741 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %D\n",rmax); 742 a->reallocs = 0; 743 A->info.nz_unneeded = (PetscReal)fshift*bs2; 744 PetscFunctionReturn(0); 745 } 746 747 /* 748 This function returns an array of flags which indicate the locations of contiguous 749 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 750 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 751 Assume: sizes should be long enough to hold all the values. 752 */ 753 #undef __FUNCT__ 754 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 755 PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 756 { 757 PetscInt i,j,k,row; 758 PetscTruth flg; 759 760 PetscFunctionBegin; 761 for (i=0,j=0; i<n; j++) { 762 row = idx[i]; 763 if (row%bs!=0) { /* Not the begining of a block */ 764 sizes[j] = 1; 765 i++; 766 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 767 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 768 i++; 769 } else { /* Begining of the block, so check if the complete block exists */ 770 flg = PETSC_TRUE; 771 for (k=1; k<bs; k++) { 772 if (row+k != idx[i+k]) { /* break in the block */ 773 flg = PETSC_FALSE; 774 break; 775 } 776 } 777 if (flg) { /* No break in the bs */ 778 sizes[j] = bs; 779 i+= bs; 780 } else { 781 sizes[j] = 1; 782 i++; 783 } 784 } 785 } 786 *bs_max = j; 787 PetscFunctionReturn(0); 788 } 789 790 791 /* Only add/insert a(i,j) with i<=j (blocks). 792 Any a(i,j) with i>j input by user is ingored. 793 */ 794 795 #undef __FUNCT__ 796 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 797 PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 798 { 799 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 800 PetscErrorCode ierr; 801 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 802 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 803 PetscInt *aj=a->j,nonew=a->nonew,bs=A->bs,brow,bcol; 804 PetscInt ridx,cidx,bs2=a->bs2; 805 MatScalar *ap,value,*aa=a->a,*bap; 806 807 PetscFunctionBegin; 808 for (k=0; k<m; k++) { /* loop over added rows */ 809 row = im[k]; /* row number */ 810 brow = row/bs; /* block row number */ 811 if (row < 0) continue; 812 #if defined(PETSC_USE_DEBUG) 813 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1); 814 #endif 815 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 816 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 817 rmax = imax[brow]; /* maximum space allocated for this row */ 818 nrow = ailen[brow]; /* actual length of this row */ 819 low = 0; 820 821 for (l=0; l<n; l++) { /* loop over added columns */ 822 if (in[l] < 0) continue; 823 #if defined(PETSC_USE_DEBUG) 824 if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->m-1); 825 #endif 826 col = in[l]; 827 bcol = col/bs; /* block col number */ 828 829 if (brow > bcol) { 830 if (a->ignore_ltriangular){ 831 continue; /* ignore lower triangular values */ 832 } else { 833 SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR)"); 834 } 835 } 836 837 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 838 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 839 /* element value a(k,l) */ 840 if (roworiented) { 841 value = v[l + k*n]; 842 } else { 843 value = v[k + l*m]; 844 } 845 846 /* move pointer bap to a(k,l) quickly and add/insert value */ 847 if (col < lastcol) low = 0; high = nrow; 848 lastcol = col; 849 while (high-low > 7) { 850 t = (low+high)/2; 851 if (rp[t] > bcol) high = t; 852 else low = t; 853 } 854 for (i=low; i<high; i++) { 855 if (rp[i] > bcol) break; 856 if (rp[i] == bcol) { 857 bap = ap + bs2*i + bs*cidx + ridx; 858 if (is == ADD_VALUES) *bap += value; 859 else *bap = value; 860 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 861 if (brow == bcol && ridx < cidx){ 862 bap = ap + bs2*i + bs*ridx + cidx; 863 if (is == ADD_VALUES) *bap += value; 864 else *bap = value; 865 } 866 goto noinsert1; 867 } 868 } 869 870 if (nonew == 1) goto noinsert1; 871 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 872 if (nrow >= rmax) { 873 /* there is no extra room in row, therefore enlarge */ 874 PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 875 MatScalar *new_a; 876 877 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 878 879 /* Malloc new storage space */ 880 len = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); 881 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 882 new_j = (PetscInt*)(new_a + bs2*new_nz); 883 new_i = new_j + new_nz; 884 885 /* copy over old data into new slots */ 886 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 887 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 888 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); 889 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 890 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); 891 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 892 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 893 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 894 /* free up old matrix storage */ 895 ierr = PetscFree(a->a);CHKERRQ(ierr); 896 if (!a->singlemalloc) { 897 ierr = PetscFree(a->i);CHKERRQ(ierr); 898 ierr = PetscFree(a->j);CHKERRQ(ierr); 899 } 900 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 901 a->singlemalloc = PETSC_TRUE; 902 903 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 904 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 905 ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); 906 a->maxnz += bs2*CHUNKSIZE; 907 a->reallocs++; 908 a->nz++; 909 } 910 911 N = nrow++ - 1; 912 /* shift up all the later entries in this row */ 913 for (ii=N; ii>=i; ii--) { 914 rp[ii+1] = rp[ii]; 915 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 916 } 917 if (N>=i) { 918 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 919 } 920 rp[i] = bcol; 921 ap[bs2*i + bs*cidx + ridx] = value; 922 noinsert1:; 923 low = i; 924 } 925 } /* end of loop over added columns */ 926 ailen[brow] = nrow; 927 } /* end of loop over added rows */ 928 PetscFunctionReturn(0); 929 } 930 931 EXTERN PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*); 932 EXTERN PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*); 933 EXTERN PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat,PetscInt,IS[],PetscInt); 934 EXTERN PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,PetscInt,MatReuse,Mat*); 935 EXTERN PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat*[]); 936 EXTERN PetscErrorCode MatScale_SeqSBAIJ(const PetscScalar*,Mat); 937 EXTERN PetscErrorCode MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *); 938 EXTERN PetscErrorCode MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*); 939 EXTERN PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat,Vec); 940 EXTERN PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec); 941 EXTERN PetscErrorCode MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *); 942 EXTERN PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat); 943 EXTERN PetscErrorCode MatGetRowMax_SeqSBAIJ(Mat,Vec); 944 945 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N(Mat,Vec,Vec); 946 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1(Mat,Vec,Vec); 947 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2(Mat,Vec,Vec); 948 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3(Mat,Vec,Vec); 949 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4(Mat,Vec,Vec); 950 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5(Mat,Vec,Vec); 951 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6(Mat,Vec,Vec); 952 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7(Mat,Vec,Vec); 953 954 EXTERN PetscErrorCode MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs); 955 956 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec); 957 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec); 958 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec); 959 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 960 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec); 961 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec); 962 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec); 963 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec); 964 965 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,MatFactorInfo*,Mat*); 966 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,MatFactorInfo*,Mat*); 967 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,MatFactorInfo*,Mat*); 968 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,MatFactorInfo*,Mat*); 969 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,MatFactorInfo*,Mat*); 970 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,MatFactorInfo*,Mat*); 971 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,MatFactorInfo*,Mat*); 972 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,MatFactorInfo*,Mat*); 973 EXTERN PetscErrorCode MatGetInertia_SeqSBAIJ(Mat,PetscInt*,PetscInt*,PetscInt*); 974 975 EXTERN PetscErrorCode MatMult_SeqSBAIJ_1(Mat,Vec,Vec); 976 EXTERN PetscErrorCode MatMult_SeqSBAIJ_2(Mat,Vec,Vec); 977 EXTERN PetscErrorCode MatMult_SeqSBAIJ_3(Mat,Vec,Vec); 978 EXTERN PetscErrorCode MatMult_SeqSBAIJ_4(Mat,Vec,Vec); 979 EXTERN PetscErrorCode MatMult_SeqSBAIJ_5(Mat,Vec,Vec); 980 EXTERN PetscErrorCode MatMult_SeqSBAIJ_6(Mat,Vec,Vec); 981 EXTERN PetscErrorCode MatMult_SeqSBAIJ_7(Mat,Vec,Vec); 982 EXTERN PetscErrorCode MatMult_SeqSBAIJ_N(Mat,Vec,Vec); 983 984 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec); 985 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec); 986 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec); 987 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec); 988 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec); 989 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec); 990 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec); 991 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec); 992 993 #ifdef HAVE_MatICCFactor 994 /* modified from MatILUFactor_SeqSBAIJ, needs further work! */ 995 #undef __FUNCT__ 996 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 997 PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info) 998 { 999 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 1000 Mat outA; 1001 PetscErrorCode ierr; 1002 PetscTruth row_identity,col_identity; 1003 1004 PetscFunctionBegin; 1005 outA = inA; 1006 inA->factor = FACTOR_CHOLESKY; 1007 1008 if (!a->diag) { 1009 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 1010 } 1011 /* 1012 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1013 for ILU(0) factorization with natural ordering 1014 */ 1015 switch (a->bs) { 1016 case 1: 1017 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1018 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1019 inA->ops->solves = MatSolves_SeqSBAIJ_1; 1020 PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n"); 1021 case 2: 1022 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 1023 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1024 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1025 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1026 break; 1027 case 3: 1028 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 1029 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1030 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1031 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n"); 1032 break; 1033 case 4: 1034 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 1035 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1036 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1037 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1038 break; 1039 case 5: 1040 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 1041 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1042 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1043 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1044 break; 1045 case 6: 1046 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 1047 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1048 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1049 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1050 break; 1051 case 7: 1052 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 1053 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1054 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1055 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1056 break; 1057 default: 1058 a->row = row; 1059 a->icol = col; 1060 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1061 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1062 1063 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1064 ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr); 1065 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 1066 1067 if (!a->solve_work) { 1068 ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1069 ierr = PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 1070 } 1071 } 1072 1073 ierr = MatCholeskyFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 1074 PetscFunctionReturn(0); 1075 } 1076 #endif 1077 1078 #undef __FUNCT__ 1079 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ" 1080 PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat A) 1081 { 1082 static PetscTruth called = PETSC_FALSE; 1083 MPI_Comm comm = A->comm; 1084 PetscErrorCode ierr; 1085 1086 PetscFunctionBegin; 1087 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1088 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1089 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1090 ierr = (*PetscHelpPrintf)(comm," -mat_ignore_ltriangular: Ignore lower triangular values set by user\n");CHKERRQ(ierr); 1091 PetscFunctionReturn(0); 1092 } 1093 1094 EXTERN_C_BEGIN 1095 #undef __FUNCT__ 1096 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1097 PetscErrorCode MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,PetscInt *indices) 1098 { 1099 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1100 PetscInt i,nz,n; 1101 1102 PetscFunctionBegin; 1103 nz = baij->maxnz; 1104 n = mat->n; 1105 for (i=0; i<nz; i++) { 1106 baij->j[i] = indices[i]; 1107 } 1108 baij->nz = nz; 1109 for (i=0; i<n; i++) { 1110 baij->ilen[i] = baij->imax[i]; 1111 } 1112 PetscFunctionReturn(0); 1113 } 1114 EXTERN_C_END 1115 1116 #undef __FUNCT__ 1117 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1118 /*@ 1119 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1120 in the matrix. 1121 1122 Input Parameters: 1123 + mat - the SeqSBAIJ matrix 1124 - indices - the column indices 1125 1126 Level: advanced 1127 1128 Notes: 1129 This can be called if you have precomputed the nonzero structure of the 1130 matrix and want to provide it to the matrix object to improve the performance 1131 of the MatSetValues() operation. 1132 1133 You MUST have set the correct numbers of nonzeros per row in the call to 1134 MatCreateSeqSBAIJ(), and the columns indices MUST be sorted. 1135 1136 MUST be called before any calls to MatSetValues() 1137 1138 .seealso: MatCreateSeqSBAIJ 1139 @*/ 1140 PetscErrorCode MatSeqSBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1141 { 1142 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1143 1144 PetscFunctionBegin; 1145 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1146 PetscValidPointer(indices,2); 1147 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1148 if (f) { 1149 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1150 } else { 1151 SETERRQ(PETSC_ERR_SUP,"Wrong type of matrix to set column indices"); 1152 } 1153 PetscFunctionReturn(0); 1154 } 1155 1156 #undef __FUNCT__ 1157 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1158 PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A) 1159 { 1160 PetscErrorCode ierr; 1161 1162 PetscFunctionBegin; 1163 ierr = MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1164 PetscFunctionReturn(0); 1165 } 1166 1167 #undef __FUNCT__ 1168 #define __FUNCT__ "MatGetArray_SeqSBAIJ" 1169 PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1170 { 1171 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1172 PetscFunctionBegin; 1173 *array = a->a; 1174 PetscFunctionReturn(0); 1175 } 1176 1177 #undef __FUNCT__ 1178 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ" 1179 PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1180 { 1181 PetscFunctionBegin; 1182 PetscFunctionReturn(0); 1183 } 1184 1185 #include "petscblaslapack.h" 1186 #undef __FUNCT__ 1187 #define __FUNCT__ "MatAXPY_SeqSBAIJ" 1188 PetscErrorCode MatAXPY_SeqSBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str) 1189 { 1190 Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data; 1191 PetscErrorCode ierr; 1192 PetscInt i,bs=Y->bs,bs2,j; 1193 PetscBLASInt bnz = (PetscBLASInt)x->nz,one = 1; 1194 1195 PetscFunctionBegin; 1196 if (str == SAME_NONZERO_PATTERN) { 1197 BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1198 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1199 if (y->xtoy && y->XtoY != X) { 1200 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1201 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1202 } 1203 if (!y->xtoy) { /* get xtoy */ 1204 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1205 y->XtoY = X; 1206 } 1207 bs2 = bs*bs; 1208 for (i=0; i<x->nz; i++) { 1209 j = 0; 1210 while (j < bs2){ 1211 y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]); 1212 j++; 1213 } 1214 } 1215 PetscLogInfo(0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)); 1216 } else { 1217 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1218 } 1219 PetscFunctionReturn(0); 1220 } 1221 1222 #undef __FUNCT__ 1223 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ" 1224 PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg) 1225 { 1226 PetscFunctionBegin; 1227 *flg = PETSC_TRUE; 1228 PetscFunctionReturn(0); 1229 } 1230 1231 #undef __FUNCT__ 1232 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ" 1233 PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg) 1234 { 1235 PetscFunctionBegin; 1236 *flg = PETSC_TRUE; 1237 PetscFunctionReturn(0); 1238 } 1239 1240 #undef __FUNCT__ 1241 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ" 1242 PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscTruth *flg) 1243 { 1244 PetscFunctionBegin; 1245 *flg = PETSC_FALSE; 1246 PetscFunctionReturn(0); 1247 } 1248 1249 /* -------------------------------------------------------------------*/ 1250 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1251 MatGetRow_SeqSBAIJ, 1252 MatRestoreRow_SeqSBAIJ, 1253 MatMult_SeqSBAIJ_N, 1254 /* 4*/ MatMultAdd_SeqSBAIJ_N, 1255 MatMult_SeqSBAIJ_N, 1256 MatMultAdd_SeqSBAIJ_N, 1257 MatSolve_SeqSBAIJ_N, 1258 0, 1259 0, 1260 /*10*/ 0, 1261 0, 1262 MatCholeskyFactor_SeqSBAIJ, 1263 MatRelax_SeqSBAIJ, 1264 MatTranspose_SeqSBAIJ, 1265 /*15*/ MatGetInfo_SeqSBAIJ, 1266 MatEqual_SeqSBAIJ, 1267 MatGetDiagonal_SeqSBAIJ, 1268 MatDiagonalScale_SeqSBAIJ, 1269 MatNorm_SeqSBAIJ, 1270 /*20*/ 0, 1271 MatAssemblyEnd_SeqSBAIJ, 1272 0, 1273 MatSetOption_SeqSBAIJ, 1274 MatZeroEntries_SeqSBAIJ, 1275 /*25*/ 0, 1276 0, 1277 0, 1278 MatCholeskyFactorSymbolic_SeqSBAIJ, 1279 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1280 /*30*/ MatSetUpPreallocation_SeqSBAIJ, 1281 0, 1282 MatICCFactorSymbolic_SeqSBAIJ, 1283 MatGetArray_SeqSBAIJ, 1284 MatRestoreArray_SeqSBAIJ, 1285 /*35*/ MatDuplicate_SeqSBAIJ, 1286 0, 1287 0, 1288 0, 1289 0, 1290 /*40*/ MatAXPY_SeqSBAIJ, 1291 MatGetSubMatrices_SeqSBAIJ, 1292 MatIncreaseOverlap_SeqSBAIJ, 1293 MatGetValues_SeqSBAIJ, 1294 0, 1295 /*45*/ MatPrintHelp_SeqSBAIJ, 1296 MatScale_SeqSBAIJ, 1297 0, 1298 0, 1299 0, 1300 /*50*/ 0, 1301 MatGetRowIJ_SeqSBAIJ, 1302 MatRestoreRowIJ_SeqSBAIJ, 1303 0, 1304 0, 1305 /*55*/ 0, 1306 0, 1307 0, 1308 0, 1309 MatSetValuesBlocked_SeqSBAIJ, 1310 /*60*/ MatGetSubMatrix_SeqSBAIJ, 1311 0, 1312 0, 1313 MatGetPetscMaps_Petsc, 1314 0, 1315 /*65*/ 0, 1316 0, 1317 0, 1318 0, 1319 0, 1320 /*70*/ MatGetRowMax_SeqSBAIJ, 1321 0, 1322 0, 1323 0, 1324 0, 1325 /*75*/ 0, 1326 0, 1327 0, 1328 0, 1329 0, 1330 /*80*/ 0, 1331 0, 1332 0, 1333 #if !defined(PETSC_USE_COMPLEX) 1334 MatGetInertia_SeqSBAIJ, 1335 #else 1336 0, 1337 #endif 1338 MatLoad_SeqSBAIJ, 1339 /*85*/ MatIsSymmetric_SeqSBAIJ, 1340 MatIsHermitian_SeqSBAIJ, 1341 MatIsStructurallySymmetric_SeqSBAIJ, 1342 0, 1343 0, 1344 /*90*/ 0, 1345 0, 1346 0, 1347 0, 1348 0, 1349 /*95*/ 0, 1350 0, 1351 0, 1352 0}; 1353 1354 EXTERN_C_BEGIN 1355 #undef __FUNCT__ 1356 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1357 PetscErrorCode MatStoreValues_SeqSBAIJ(Mat mat) 1358 { 1359 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1360 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 1361 PetscErrorCode ierr; 1362 1363 PetscFunctionBegin; 1364 if (aij->nonew != 1) { 1365 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1366 } 1367 1368 /* allocate space for values if not already there */ 1369 if (!aij->saved_values) { 1370 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1371 } 1372 1373 /* copy values over */ 1374 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1375 PetscFunctionReturn(0); 1376 } 1377 EXTERN_C_END 1378 1379 EXTERN_C_BEGIN 1380 #undef __FUNCT__ 1381 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1382 PetscErrorCode MatRetrieveValues_SeqSBAIJ(Mat mat) 1383 { 1384 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1385 PetscErrorCode ierr; 1386 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 1387 1388 PetscFunctionBegin; 1389 if (aij->nonew != 1) { 1390 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1391 } 1392 if (!aij->saved_values) { 1393 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 1394 } 1395 1396 /* copy values over */ 1397 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1398 PetscFunctionReturn(0); 1399 } 1400 EXTERN_C_END 1401 1402 EXTERN_C_BEGIN 1403 #undef __FUNCT__ 1404 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ" 1405 PetscErrorCode MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 1406 { 1407 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1408 PetscErrorCode ierr; 1409 PetscInt i,len,mbs,bs2; 1410 PetscTruth flg; 1411 1412 PetscFunctionBegin; 1413 B->preallocated = PETSC_TRUE; 1414 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1415 mbs = B->m/bs; 1416 bs2 = bs*bs; 1417 1418 if (mbs*bs != B->m) { 1419 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1420 } 1421 1422 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1423 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 1424 if (nnz) { 1425 for (i=0; i<mbs; i++) { 1426 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 1427 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],mbs); 1428 } 1429 } 1430 1431 ierr = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1432 if (!flg) { 1433 switch (bs) { 1434 case 1: 1435 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1436 B->ops->solve = MatSolve_SeqSBAIJ_1; 1437 B->ops->solves = MatSolves_SeqSBAIJ_1; 1438 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 1439 B->ops->mult = MatMult_SeqSBAIJ_1; 1440 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1441 break; 1442 case 2: 1443 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1444 B->ops->solve = MatSolve_SeqSBAIJ_2; 1445 B->ops->solvetranspose = MatSolve_SeqSBAIJ_2; 1446 B->ops->mult = MatMult_SeqSBAIJ_2; 1447 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1448 break; 1449 case 3: 1450 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1451 B->ops->solve = MatSolve_SeqSBAIJ_3; 1452 B->ops->solvetranspose = MatSolve_SeqSBAIJ_3; 1453 B->ops->mult = MatMult_SeqSBAIJ_3; 1454 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1455 break; 1456 case 4: 1457 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1458 B->ops->solve = MatSolve_SeqSBAIJ_4; 1459 B->ops->solvetranspose = MatSolve_SeqSBAIJ_4; 1460 B->ops->mult = MatMult_SeqSBAIJ_4; 1461 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1462 break; 1463 case 5: 1464 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1465 B->ops->solve = MatSolve_SeqSBAIJ_5; 1466 B->ops->solvetranspose = MatSolve_SeqSBAIJ_5; 1467 B->ops->mult = MatMult_SeqSBAIJ_5; 1468 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1469 break; 1470 case 6: 1471 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1472 B->ops->solve = MatSolve_SeqSBAIJ_6; 1473 B->ops->solvetranspose = MatSolve_SeqSBAIJ_6; 1474 B->ops->mult = MatMult_SeqSBAIJ_6; 1475 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1476 break; 1477 case 7: 1478 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1479 B->ops->solve = MatSolve_SeqSBAIJ_7; 1480 B->ops->solvetranspose = MatSolve_SeqSBAIJ_7; 1481 B->ops->mult = MatMult_SeqSBAIJ_7; 1482 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1483 break; 1484 } 1485 } 1486 1487 b->mbs = mbs; 1488 b->nbs = mbs; 1489 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->imax);CHKERRQ(ierr); 1490 if (!nnz) { 1491 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1492 else if (nz <= 0) nz = 1; 1493 for (i=0; i<mbs; i++) { 1494 b->imax[i] = nz; 1495 } 1496 nz = nz*mbs; /* total nz */ 1497 } else { 1498 nz = 0; 1499 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1500 } 1501 /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1502 1503 /* allocate the matrix space */ 1504 len = nz*sizeof(PetscInt) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(PetscInt); 1505 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1506 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1507 b->j = (PetscInt*)(b->a + nz*bs2); 1508 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1509 b->i = b->j + nz; 1510 b->singlemalloc = PETSC_TRUE; 1511 1512 /* pointer to beginning of each row */ 1513 b->i[0] = 0; 1514 for (i=1; i<mbs+1; i++) { 1515 b->i[i] = b->i[i-1] + b->imax[i-1]; 1516 } 1517 1518 /* b->ilen will count nonzeros in each block row so far. */ 1519 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->ilen);CHKERRQ(ierr); 1520 ierr = PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr); 1521 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1522 1523 B->bs = bs; 1524 b->bs2 = bs2; 1525 b->nz = 0; 1526 b->maxnz = nz*bs2; 1527 1528 b->inew = 0; 1529 b->jnew = 0; 1530 b->anew = 0; 1531 b->a2anew = 0; 1532 b->permute = PETSC_FALSE; 1533 PetscFunctionReturn(0); 1534 } 1535 EXTERN_C_END 1536 1537 EXTERN_C_BEGIN 1538 EXTERN PetscErrorCode MatConvert_SeqSBAIJ_SeqAIJ(Mat,const MatType,MatReuse,Mat*); 1539 EXTERN PetscErrorCode MatConvert_SeqSBAIJ_SeqBAIJ(Mat,const MatType,MatReuse,Mat*); 1540 EXTERN_C_END 1541 1542 /*MC 1543 MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices, 1544 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1545 1546 Options Database Keys: 1547 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions() 1548 1549 Level: beginner 1550 1551 .seealso: MatCreateSeqSBAIJ 1552 M*/ 1553 1554 EXTERN_C_BEGIN 1555 #undef __FUNCT__ 1556 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1557 PetscErrorCode MatCreate_SeqSBAIJ(Mat B) 1558 { 1559 Mat_SeqSBAIJ *b; 1560 PetscErrorCode ierr; 1561 PetscMPIInt size; 1562 PetscTruth flg; 1563 1564 PetscFunctionBegin; 1565 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1566 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1567 B->m = B->M = PetscMax(B->m,B->M); 1568 B->n = B->N = PetscMax(B->n,B->N); 1569 1570 ierr = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1571 B->data = (void*)b; 1572 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1573 B->ops->destroy = MatDestroy_SeqSBAIJ; 1574 B->ops->view = MatView_SeqSBAIJ; 1575 B->factor = 0; 1576 B->lupivotthreshold = 1.0; 1577 B->mapping = 0; 1578 b->row = 0; 1579 b->icol = 0; 1580 b->reallocs = 0; 1581 b->saved_values = 0; 1582 1583 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1584 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1585 1586 b->sorted = PETSC_FALSE; 1587 b->roworiented = PETSC_TRUE; 1588 b->nonew = 0; 1589 b->diag = 0; 1590 b->solve_work = 0; 1591 b->mult_work = 0; 1592 B->spptr = 0; 1593 b->keepzeroedrows = PETSC_FALSE; 1594 b->xtoy = 0; 1595 b->XtoY = 0; 1596 1597 b->inew = 0; 1598 b->jnew = 0; 1599 b->anew = 0; 1600 b->a2anew = 0; 1601 b->permute = PETSC_FALSE; 1602 1603 b->ignore_ltriangular = PETSC_FALSE; 1604 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_ignore_lower_triangular",&flg);CHKERRQ(ierr); 1605 if (flg) b->ignore_ltriangular = PETSC_TRUE; 1606 1607 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1608 "MatStoreValues_SeqSBAIJ", 1609 MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1610 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1611 "MatRetrieveValues_SeqSBAIJ", 1612 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1613 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1614 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1615 MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1616 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C", 1617 "MatConvert_SeqSBAIJ_SeqAIJ", 1618 MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr); 1619 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C", 1620 "MatConvert_SeqSBAIJ_SeqBAIJ", 1621 MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr); 1622 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C", 1623 "MatSeqSBAIJSetPreallocation_SeqSBAIJ", 1624 MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr); 1625 1626 B->symmetric = PETSC_TRUE; 1627 B->structurally_symmetric = PETSC_TRUE; 1628 B->symmetric_set = PETSC_TRUE; 1629 B->structurally_symmetric_set = PETSC_TRUE; 1630 PetscFunctionReturn(0); 1631 } 1632 EXTERN_C_END 1633 1634 #undef __FUNCT__ 1635 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1636 /*@C 1637 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1638 compressed row) format. For good matrix assembly performance the 1639 user should preallocate the matrix storage by setting the parameter nz 1640 (or the array nnz). By setting these parameters accurately, performance 1641 during matrix assembly can be increased by more than a factor of 50. 1642 1643 Collective on Mat 1644 1645 Input Parameters: 1646 + A - the symmetric matrix 1647 . bs - size of block 1648 . nz - number of block nonzeros per block row (same for all rows) 1649 - nnz - array containing the number of block nonzeros in the upper triangular plus 1650 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1651 1652 Options Database Keys: 1653 . -mat_no_unroll - uses code that does not unroll the loops in the 1654 block calculations (much slower) 1655 . -mat_block_size - size of the blocks to use 1656 1657 Level: intermediate 1658 1659 Notes: 1660 Specify the preallocated storage with either nz or nnz (not both). 1661 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1662 allocation. For additional details, see the users manual chapter on 1663 matrices. 1664 1665 If the nnz parameter is given then the nz parameter is ignored 1666 1667 1668 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1669 @*/ 1670 PetscErrorCode MatSeqSBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 1671 { 1672 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 1673 1674 PetscFunctionBegin; 1675 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1676 if (f) { 1677 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 1678 } 1679 PetscFunctionReturn(0); 1680 } 1681 1682 #undef __FUNCT__ 1683 #define __FUNCT__ "MatCreateSeqSBAIJ" 1684 /*@C 1685 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1686 compressed row) format. For good matrix assembly performance the 1687 user should preallocate the matrix storage by setting the parameter nz 1688 (or the array nnz). By setting these parameters accurately, performance 1689 during matrix assembly can be increased by more than a factor of 50. 1690 1691 Collective on MPI_Comm 1692 1693 Input Parameters: 1694 + comm - MPI communicator, set to PETSC_COMM_SELF 1695 . bs - size of block 1696 . m - number of rows, or number of columns 1697 . nz - number of block nonzeros per block row (same for all rows) 1698 - nnz - array containing the number of block nonzeros in the upper triangular plus 1699 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1700 1701 Output Parameter: 1702 . A - the symmetric matrix 1703 1704 Options Database Keys: 1705 . -mat_no_unroll - uses code that does not unroll the loops in the 1706 block calculations (much slower) 1707 . -mat_block_size - size of the blocks to use 1708 1709 Level: intermediate 1710 1711 Notes: 1712 1713 Specify the preallocated storage with either nz or nnz (not both). 1714 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1715 allocation. For additional details, see the users manual chapter on 1716 matrices. 1717 1718 If the nnz parameter is given then the nz parameter is ignored 1719 1720 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1721 @*/ 1722 PetscErrorCode MatCreateSeqSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 1723 { 1724 PetscErrorCode ierr; 1725 1726 PetscFunctionBegin; 1727 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1728 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1729 ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1730 PetscFunctionReturn(0); 1731 } 1732 1733 #undef __FUNCT__ 1734 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1735 PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1736 { 1737 Mat C; 1738 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1739 PetscErrorCode ierr; 1740 PetscInt i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 1741 1742 PetscFunctionBegin; 1743 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1744 1745 *B = 0; 1746 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1747 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 1748 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1749 c = (Mat_SeqSBAIJ*)C->data; 1750 1751 C->preallocated = PETSC_TRUE; 1752 C->factor = A->factor; 1753 c->row = 0; 1754 c->icol = 0; 1755 c->saved_values = 0; 1756 c->keepzeroedrows = a->keepzeroedrows; 1757 C->assembled = PETSC_TRUE; 1758 1759 C->M = A->M; 1760 C->N = A->N; 1761 C->bs = A->bs; 1762 c->bs2 = a->bs2; 1763 c->mbs = a->mbs; 1764 c->nbs = a->nbs; 1765 1766 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->imax);CHKERRQ(ierr); 1767 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->ilen);CHKERRQ(ierr); 1768 for (i=0; i<mbs; i++) { 1769 c->imax[i] = a->imax[i]; 1770 c->ilen[i] = a->ilen[i]; 1771 } 1772 1773 /* allocate the matrix space */ 1774 c->singlemalloc = PETSC_TRUE; 1775 len = (mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt)); 1776 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1777 c->j = (PetscInt*)(c->a + nz*bs2); 1778 c->i = c->j + nz; 1779 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1780 if (mbs > 0) { 1781 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1782 if (cpvalues == MAT_COPY_VALUES) { 1783 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1784 } else { 1785 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1786 } 1787 } 1788 1789 ierr = PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr); 1790 c->sorted = a->sorted; 1791 c->roworiented = a->roworiented; 1792 c->nonew = a->nonew; 1793 1794 if (a->diag) { 1795 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 1796 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1797 for (i=0; i<mbs; i++) { 1798 c->diag[i] = a->diag[i]; 1799 } 1800 } else c->diag = 0; 1801 c->nz = a->nz; 1802 c->maxnz = a->maxnz; 1803 c->solve_work = 0; 1804 c->mult_work = 0; 1805 *B = C; 1806 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1807 PetscFunctionReturn(0); 1808 } 1809 1810 #undef __FUNCT__ 1811 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1812 PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer,const MatType type,Mat *A) 1813 { 1814 Mat_SeqSBAIJ *a; 1815 Mat B; 1816 PetscErrorCode ierr; 1817 int fd; 1818 PetscMPIInt size; 1819 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 1820 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 1821 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 1822 PetscInt *masked,nmask,tmp,bs2,ishift; 1823 PetscScalar *aa; 1824 MPI_Comm comm = ((PetscObject)viewer)->comm; 1825 1826 PetscFunctionBegin; 1827 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1828 bs2 = bs*bs; 1829 1830 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1831 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1832 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1833 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1834 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1835 M = header[1]; N = header[2]; nz = header[3]; 1836 1837 if (header[3] < 0) { 1838 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1839 } 1840 1841 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1842 1843 /* 1844 This code adds extra rows to make sure the number of rows is 1845 divisible by the blocksize 1846 */ 1847 mbs = M/bs; 1848 extra_rows = bs - M + bs*(mbs); 1849 if (extra_rows == bs) extra_rows = 0; 1850 else mbs++; 1851 if (extra_rows) { 1852 PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n"); 1853 } 1854 1855 /* read in row lengths */ 1856 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1857 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1858 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1859 1860 /* read in column indices */ 1861 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1862 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1863 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1864 1865 /* loop over row lengths determining block row lengths */ 1866 ierr = PetscMalloc(mbs*sizeof(PetscInt),&s_browlengths);CHKERRQ(ierr); 1867 ierr = PetscMemzero(s_browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1868 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 1869 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1870 masked = mask + mbs; 1871 rowcount = 0; nzcount = 0; 1872 for (i=0; i<mbs; i++) { 1873 nmask = 0; 1874 for (j=0; j<bs; j++) { 1875 kmax = rowlengths[rowcount]; 1876 for (k=0; k<kmax; k++) { 1877 tmp = jj[nzcount++]/bs; /* block col. index */ 1878 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1879 } 1880 rowcount++; 1881 } 1882 s_browlengths[i] += nmask; 1883 1884 /* zero out the mask elements we set */ 1885 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1886 } 1887 1888 /* create our matrix */ 1889 ierr = MatCreate(comm,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows,&B);CHKERRQ(ierr); 1890 ierr = MatSetType(B,type);CHKERRQ(ierr); 1891 ierr = MatSeqSBAIJSetPreallocation(B,bs,0,s_browlengths);CHKERRQ(ierr); 1892 a = (Mat_SeqSBAIJ*)B->data; 1893 1894 /* set matrix "i" values */ 1895 a->i[0] = 0; 1896 for (i=1; i<= mbs; i++) { 1897 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1898 a->ilen[i-1] = s_browlengths[i-1]; 1899 } 1900 a->nz = a->i[mbs]; 1901 1902 /* read in nonzero values */ 1903 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1904 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1905 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1906 1907 /* set "a" and "j" values into matrix */ 1908 nzcount = 0; jcount = 0; 1909 for (i=0; i<mbs; i++) { 1910 nzcountb = nzcount; 1911 nmask = 0; 1912 for (j=0; j<bs; j++) { 1913 kmax = rowlengths[i*bs+j]; 1914 for (k=0; k<kmax; k++) { 1915 tmp = jj[nzcount++]/bs; /* block col. index */ 1916 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1917 } 1918 } 1919 /* sort the masked values */ 1920 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1921 1922 /* set "j" values into matrix */ 1923 maskcount = 1; 1924 for (j=0; j<nmask; j++) { 1925 a->j[jcount++] = masked[j]; 1926 mask[masked[j]] = maskcount++; 1927 } 1928 1929 /* set "a" values into matrix */ 1930 ishift = bs2*a->i[i]; 1931 for (j=0; j<bs; j++) { 1932 kmax = rowlengths[i*bs+j]; 1933 for (k=0; k<kmax; k++) { 1934 tmp = jj[nzcountb]/bs ; /* block col. index */ 1935 if (tmp >= i){ 1936 block = mask[tmp] - 1; 1937 point = jj[nzcountb] - bs*tmp; 1938 idx = ishift + bs2*block + j + bs*point; 1939 a->a[idx] = aa[nzcountb]; 1940 } 1941 nzcountb++; 1942 } 1943 } 1944 /* zero out the mask elements we set */ 1945 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1946 } 1947 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1948 1949 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1950 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1951 ierr = PetscFree(aa);CHKERRQ(ierr); 1952 ierr = PetscFree(jj);CHKERRQ(ierr); 1953 ierr = PetscFree(mask);CHKERRQ(ierr); 1954 1955 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1956 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1957 ierr = MatView_Private(B);CHKERRQ(ierr); 1958 *A = B; 1959 PetscFunctionReturn(0); 1960 } 1961 1962 #undef __FUNCT__ 1963 #define __FUNCT__ "MatRelax_SeqSBAIJ" 1964 PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1965 { 1966 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1967 MatScalar *aa=a->a,*v,*v1; 1968 PetscScalar *x,*b,*t,sum,d; 1969 PetscErrorCode ierr; 1970 PetscInt m=a->mbs,bs=A->bs,*ai=a->i,*aj=a->j; 1971 PetscInt nz,nz1,*vj,*vj1,i; 1972 1973 PetscFunctionBegin; 1974 its = its*lits; 1975 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1976 1977 if (bs > 1) 1978 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 1979 1980 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1981 if (xx != bb) { 1982 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1983 } else { 1984 b = x; 1985 } 1986 1987 ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr); 1988 1989 if (flag & SOR_ZERO_INITIAL_GUESS) { 1990 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1991 for (i=0; i<m; i++) 1992 t[i] = b[i]; 1993 1994 for (i=0; i<m; i++){ 1995 d = *(aa + ai[i]); /* diag[i] */ 1996 v = aa + ai[i] + 1; 1997 vj = aj + ai[i] + 1; 1998 nz = ai[i+1] - ai[i] - 1; 1999 x[i] = omega*t[i]/d; 2000 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 2001 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2002 } 2003 } 2004 2005 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2006 for (i=0; i<m; i++) 2007 t[i] = b[i]; 2008 2009 for (i=0; i<m-1; i++){ /* update rhs */ 2010 v = aa + ai[i] + 1; 2011 vj = aj + ai[i] + 1; 2012 nz = ai[i+1] - ai[i] - 1; 2013 while (nz--) t[*vj++] -= x[i]*(*v++); 2014 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2015 } 2016 for (i=m-1; i>=0; i--){ 2017 d = *(aa + ai[i]); 2018 v = aa + ai[i] + 1; 2019 vj = aj + ai[i] + 1; 2020 nz = ai[i+1] - ai[i] - 1; 2021 sum = t[i]; 2022 while (nz--) sum -= x[*vj++]*(*v++); 2023 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2024 x[i] = (1-omega)*x[i] + omega*sum/d; 2025 } 2026 } 2027 its--; 2028 } 2029 2030 while (its--) { 2031 /* 2032 forward sweep: 2033 for i=0,...,m-1: 2034 sum[i] = (b[i] - U(i,:)x )/d[i]; 2035 x[i] = (1-omega)x[i] + omega*sum[i]; 2036 b = b - x[i]*U^T(i,:); 2037 2038 */ 2039 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2040 for (i=0; i<m; i++) 2041 t[i] = b[i]; 2042 2043 for (i=0; i<m; i++){ 2044 d = *(aa + ai[i]); /* diag[i] */ 2045 v = aa + ai[i] + 1; v1=v; 2046 vj = aj + ai[i] + 1; vj1=vj; 2047 nz = ai[i+1] - ai[i] - 1; nz1=nz; 2048 sum = t[i]; 2049 while (nz1--) sum -= (*v1++)*x[*vj1++]; 2050 x[i] = (1-omega)*x[i] + omega*sum/d; 2051 while (nz--) t[*vj++] -= x[i]*(*v++); 2052 ierr = PetscLogFlops(4*nz-2);CHKERRQ(ierr); 2053 } 2054 } 2055 2056 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2057 /* 2058 backward sweep: 2059 b = b - x[i]*U^T(i,:), i=0,...,n-2 2060 for i=m-1,...,0: 2061 sum[i] = (b[i] - U(i,:)x )/d[i]; 2062 x[i] = (1-omega)x[i] + omega*sum[i]; 2063 */ 2064 for (i=0; i<m; i++) 2065 t[i] = b[i]; 2066 2067 for (i=0; i<m-1; i++){ /* update rhs */ 2068 v = aa + ai[i] + 1; 2069 vj = aj + ai[i] + 1; 2070 nz = ai[i+1] - ai[i] - 1; 2071 while (nz--) t[*vj++] -= x[i]*(*v++); 2072 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2073 } 2074 for (i=m-1; i>=0; i--){ 2075 d = *(aa + ai[i]); 2076 v = aa + ai[i] + 1; 2077 vj = aj + ai[i] + 1; 2078 nz = ai[i+1] - ai[i] - 1; 2079 sum = t[i]; 2080 while (nz--) sum -= x[*vj++]*(*v++); 2081 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2082 x[i] = (1-omega)*x[i] + omega*sum/d; 2083 } 2084 } 2085 } 2086 2087 ierr = PetscFree(t);CHKERRQ(ierr); 2088 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2089 if (bb != xx) { 2090 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2091 } 2092 PetscFunctionReturn(0); 2093 } 2094 2095 2096 2097 2098 2099 2100