1 2 /* 3 Defines the basic matrix operations for the SBAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 7 #include "src/inline/spops.h" 8 #include "src/mat/impls/sbaij/seq/sbaij.h" 9 10 #define CHUNKSIZE 10 11 12 /* 13 Checks for missing diagonals 14 */ 15 #undef __FUNCT__ 16 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 17 PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A) 18 { 19 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 20 PetscErrorCode ierr; 21 int *diag,*jj = a->j,i; 22 23 PetscFunctionBegin; 24 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 25 diag = a->diag; 26 for (i=0; i<a->mbs; i++) { 27 if (jj[diag[i]] != i) SETERRQ1(PETSC_ERR_ARG_CORRUPT,"Matrix is missing diagonal number %D",i); 28 } 29 PetscFunctionReturn(0); 30 } 31 32 #undef __FUNCT__ 33 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 34 PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A) 35 { 36 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 37 PetscErrorCode ierr; 38 int i,mbs = a->mbs; 39 40 PetscFunctionBegin; 41 if (a->diag) PetscFunctionReturn(0); 42 43 ierr = PetscMalloc((mbs+1)*sizeof(int),&a->diag);CHKERRQ(ierr); 44 PetscLogObjectMemory(A,(mbs+1)*sizeof(int)); 45 for (i=0; i<mbs; i++) a->diag[i] = a->i[i]; 46 PetscFunctionReturn(0); 47 } 48 49 #undef __FUNCT__ 50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 51 static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done) 52 { 53 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 54 int n = a->mbs,i; 55 56 PetscFunctionBegin; 57 *nn = n; 58 if (!ia) PetscFunctionReturn(0); 59 60 if (oshift == 1) { 61 /* temporarily add 1 to i and j indices */ 62 int nz = a->i[n]; 63 for (i=0; i<nz; i++) a->j[i]++; 64 for (i=0; i<n+1; i++) a->i[i]++; 65 *ia = a->i; *ja = a->j; 66 } else { 67 *ia = a->i; *ja = a->j; 68 } 69 PetscFunctionReturn(0); 70 } 71 72 #undef __FUNCT__ 73 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 74 static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done) 75 { 76 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 77 int i,n = a->mbs; 78 79 PetscFunctionBegin; 80 if (!ia) PetscFunctionReturn(0); 81 82 if (oshift == 1) { 83 int nz = a->i[n]-1; 84 for (i=0; i<nz; i++) a->j[i]--; 85 for (i=0; i<n+1; i++) a->i[i]--; 86 } 87 PetscFunctionReturn(0); 88 } 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 92 PetscErrorCode MatDestroy_SeqSBAIJ(Mat A) 93 { 94 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 95 PetscErrorCode ierr; 96 97 PetscFunctionBegin; 98 PetscLogObjectState((PetscObject)A,"Rows=%D, NZ=%D",A->m,a->nz); 99 ierr = PetscFree(a->a);CHKERRQ(ierr); 100 if (!a->singlemalloc) { 101 ierr = PetscFree(a->i);CHKERRQ(ierr); 102 ierr = PetscFree(a->j);CHKERRQ(ierr); 103 } 104 if (a->row) { 105 ierr = ISDestroy(a->row);CHKERRQ(ierr); 106 } 107 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 108 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 109 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 110 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 111 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 112 if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);} 113 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 114 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 115 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 116 117 if (a->inew){ 118 ierr = PetscFree(a->inew);CHKERRQ(ierr); 119 a->inew = 0; 120 } 121 ierr = PetscFree(a);CHKERRQ(ierr); 122 123 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 124 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 125 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 126 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 127 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqbaij_C","",PETSC_NULL);CHKERRQ(ierr); 128 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 129 PetscFunctionReturn(0); 130 } 131 132 #undef __FUNCT__ 133 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 134 PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op) 135 { 136 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 137 138 PetscFunctionBegin; 139 switch (op) { 140 case MAT_ROW_ORIENTED: 141 a->roworiented = PETSC_TRUE; 142 break; 143 case MAT_COLUMN_ORIENTED: 144 a->roworiented = PETSC_FALSE; 145 break; 146 case MAT_COLUMNS_SORTED: 147 a->sorted = PETSC_TRUE; 148 break; 149 case MAT_COLUMNS_UNSORTED: 150 a->sorted = PETSC_FALSE; 151 break; 152 case MAT_KEEP_ZEROED_ROWS: 153 a->keepzeroedrows = PETSC_TRUE; 154 break; 155 case MAT_NO_NEW_NONZERO_LOCATIONS: 156 a->nonew = 1; 157 break; 158 case MAT_NEW_NONZERO_LOCATION_ERR: 159 a->nonew = -1; 160 break; 161 case MAT_NEW_NONZERO_ALLOCATION_ERR: 162 a->nonew = -2; 163 break; 164 case MAT_YES_NEW_NONZERO_LOCATIONS: 165 a->nonew = 0; 166 break; 167 case MAT_ROWS_SORTED: 168 case MAT_ROWS_UNSORTED: 169 case MAT_YES_NEW_DIAGONALS: 170 case MAT_IGNORE_OFF_PROC_ENTRIES: 171 case MAT_USE_HASH_TABLE: 172 PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n"); 173 break; 174 case MAT_NO_NEW_DIAGONALS: 175 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 176 case MAT_NOT_SYMMETRIC: 177 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 178 case MAT_HERMITIAN: 179 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 180 case MAT_SYMMETRIC: 181 case MAT_STRUCTURALLY_SYMMETRIC: 182 case MAT_NOT_HERMITIAN: 183 case MAT_SYMMETRY_ETERNAL: 184 case MAT_NOT_SYMMETRY_ETERNAL: 185 break; 186 default: 187 SETERRQ(PETSC_ERR_SUP,"unknown option"); 188 } 189 PetscFunctionReturn(0); 190 } 191 192 #undef __FUNCT__ 193 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 194 PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,PetscScalar **v) 195 { 196 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 197 PetscErrorCode ierr; 198 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2; 199 MatScalar *aa,*aa_i; 200 PetscScalar *v_i; 201 202 PetscFunctionBegin; 203 bs = A->bs; 204 ai = a->i; 205 aj = a->j; 206 aa = a->a; 207 bs2 = a->bs2; 208 209 if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %D out of range", row); 210 211 bn = row/bs; /* Block number */ 212 bp = row % bs; /* Block position */ 213 M = ai[bn+1] - ai[bn]; 214 *ncols = bs*M; 215 216 if (v) { 217 *v = 0; 218 if (*ncols) { 219 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 220 for (i=0; i<M; i++) { /* for each block in the block row */ 221 v_i = *v + i*bs; 222 aa_i = aa + bs2*(ai[bn] + i); 223 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 224 } 225 } 226 } 227 228 if (cols) { 229 *cols = 0; 230 if (*ncols) { 231 ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr); 232 for (i=0; i<M; i++) { /* for each block in the block row */ 233 cols_i = *cols + i*bs; 234 itmp = bs*aj[ai[bn] + i]; 235 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 236 } 237 } 238 } 239 240 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 241 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 242 #ifdef column_search 243 v_i = *v + M*bs; 244 cols_i = *cols + M*bs; 245 for (i=0; i<bn; i++){ /* for each block row */ 246 M = ai[i+1] - ai[i]; 247 for (j=0; j<M; j++){ 248 itmp = aj[ai[i] + j]; /* block column value */ 249 if (itmp == bn){ 250 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 251 for (k=0; k<bs; k++) { 252 *cols_i++ = i*bs+k; 253 *v_i++ = aa_i[k]; 254 } 255 *ncols += bs; 256 break; 257 } 258 } 259 } 260 #endif 261 262 PetscFunctionReturn(0); 263 } 264 265 #undef __FUNCT__ 266 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 267 PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 268 { 269 PetscErrorCode ierr; 270 271 PetscFunctionBegin; 272 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 273 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 274 PetscFunctionReturn(0); 275 } 276 277 #undef __FUNCT__ 278 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 279 PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,Mat *B) 280 { 281 PetscErrorCode ierr; 282 PetscFunctionBegin; 283 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 284 PetscFunctionReturn(0); 285 } 286 287 #undef __FUNCT__ 288 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 289 static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 290 { 291 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 292 PetscErrorCode ierr; 293 PetscInt i,j,bs = A->bs,k,l,bs2=a->bs2; 294 char *name; 295 PetscViewerFormat format; 296 297 PetscFunctionBegin; 298 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 299 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 300 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 301 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 302 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 303 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 304 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 305 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 306 for (i=0; i<a->mbs; i++) { 307 for (j=0; j<bs; j++) { 308 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 309 for (k=a->i[i]; k<a->i[i+1]; k++) { 310 for (l=0; l<bs; l++) { 311 #if defined(PETSC_USE_COMPLEX) 312 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 313 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 314 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 315 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 316 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 317 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 318 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 319 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 320 } 321 #else 322 if (a->a[bs2*k + l*bs + j] != 0.0) { 323 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 324 } 325 #endif 326 } 327 } 328 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 329 } 330 } 331 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 332 } else { 333 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 334 for (i=0; i<a->mbs; i++) { 335 for (j=0; j<bs; j++) { 336 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 337 for (k=a->i[i]; k<a->i[i+1]; k++) { 338 for (l=0; l<bs; l++) { 339 #if defined(PETSC_USE_COMPLEX) 340 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 341 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 342 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 343 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 344 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 345 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 346 } else { 347 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 348 } 349 #else 350 ierr = PetscViewerASCIIPrintf(viewer," %D %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 351 #endif 352 } 353 } 354 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 355 } 356 } 357 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 358 } 359 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 360 PetscFunctionReturn(0); 361 } 362 363 #undef __FUNCT__ 364 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 365 static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 366 { 367 Mat A = (Mat) Aa; 368 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 369 PetscErrorCode ierr; 370 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->bs,bs2=a->bs2,rank; 371 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 372 MatScalar *aa; 373 MPI_Comm comm; 374 PetscViewer viewer; 375 376 PetscFunctionBegin; 377 /* 378 This is nasty. If this is called from an originally parallel matrix 379 then all processes call this,but only the first has the matrix so the 380 rest should return immediately. 381 */ 382 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 383 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 384 if (rank) PetscFunctionReturn(0); 385 386 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 387 388 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 389 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 390 391 /* loop over matrix elements drawing boxes */ 392 color = PETSC_DRAW_BLUE; 393 for (i=0,row=0; i<mbs; i++,row+=bs) { 394 for (j=a->i[i]; j<a->i[i+1]; j++) { 395 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 396 x_l = a->j[j]*bs; x_r = x_l + 1.0; 397 aa = a->a + j*bs2; 398 for (k=0; k<bs; k++) { 399 for (l=0; l<bs; l++) { 400 if (PetscRealPart(*aa++) >= 0.) continue; 401 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 402 } 403 } 404 } 405 } 406 color = PETSC_DRAW_CYAN; 407 for (i=0,row=0; i<mbs; i++,row+=bs) { 408 for (j=a->i[i]; j<a->i[i+1]; j++) { 409 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 410 x_l = a->j[j]*bs; x_r = x_l + 1.0; 411 aa = a->a + j*bs2; 412 for (k=0; k<bs; k++) { 413 for (l=0; l<bs; l++) { 414 if (PetscRealPart(*aa++) != 0.) continue; 415 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 416 } 417 } 418 } 419 } 420 421 color = PETSC_DRAW_RED; 422 for (i=0,row=0; i<mbs; i++,row+=bs) { 423 for (j=a->i[i]; j<a->i[i+1]; j++) { 424 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 425 x_l = a->j[j]*bs; x_r = x_l + 1.0; 426 aa = a->a + j*bs2; 427 for (k=0; k<bs; k++) { 428 for (l=0; l<bs; l++) { 429 if (PetscRealPart(*aa++) <= 0.) continue; 430 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 431 } 432 } 433 } 434 } 435 PetscFunctionReturn(0); 436 } 437 438 #undef __FUNCT__ 439 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 440 static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 441 { 442 PetscErrorCode ierr; 443 PetscReal xl,yl,xr,yr,w,h; 444 PetscDraw draw; 445 PetscTruth isnull; 446 447 PetscFunctionBegin; 448 449 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 450 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 451 452 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 453 xr = A->m; yr = A->m; h = yr/10.0; w = xr/10.0; 454 xr += w; yr += h; xl = -w; yl = -h; 455 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 456 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 457 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 458 PetscFunctionReturn(0); 459 } 460 461 #undef __FUNCT__ 462 #define __FUNCT__ "MatView_SeqSBAIJ" 463 PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 464 { 465 PetscErrorCode ierr; 466 PetscTruth iascii,isdraw; 467 468 PetscFunctionBegin; 469 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 470 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 471 if (iascii){ 472 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 473 } else if (isdraw) { 474 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 475 } else { 476 Mat B; 477 ierr = MatConvert(A,MATSEQAIJ,&B);CHKERRQ(ierr); 478 ierr = MatView(B,viewer);CHKERRQ(ierr); 479 ierr = MatDestroy(B);CHKERRQ(ierr); 480 } 481 PetscFunctionReturn(0); 482 } 483 484 485 #undef __FUNCT__ 486 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 487 PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[]) 488 { 489 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 490 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 491 int *ai = a->i,*ailen = a->ilen; 492 int brow,bcol,ridx,cidx,bs=A->bs,bs2=a->bs2; 493 MatScalar *ap,*aa = a->a,zero = 0.0; 494 495 PetscFunctionBegin; 496 for (k=0; k<m; k++) { /* loop over rows */ 497 row = im[k]; brow = row/bs; 498 if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",row); 499 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1); 500 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 501 nrow = ailen[brow]; 502 for (l=0; l<n; l++) { /* loop over columns */ 503 if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",in[l]); 504 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->n-1); 505 col = in[l] ; 506 bcol = col/bs; 507 cidx = col%bs; 508 ridx = row%bs; 509 high = nrow; 510 low = 0; /* assume unsorted */ 511 while (high-low > 5) { 512 t = (low+high)/2; 513 if (rp[t] > bcol) high = t; 514 else low = t; 515 } 516 for (i=low; i<high; i++) { 517 if (rp[i] > bcol) break; 518 if (rp[i] == bcol) { 519 *v++ = ap[bs2*i+bs*cidx+ridx]; 520 goto finished; 521 } 522 } 523 *v++ = zero; 524 finished:; 525 } 526 } 527 PetscFunctionReturn(0); 528 } 529 530 531 #undef __FUNCT__ 532 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 533 PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is) 534 { 535 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 536 PetscErrorCode ierr; 537 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 538 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 539 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->bs,stepval; 540 PetscTruth roworiented=a->roworiented; 541 const MatScalar *value = v; 542 MatScalar *ap,*aa = a->a,*bap; 543 544 PetscFunctionBegin; 545 if (roworiented) { 546 stepval = (n-1)*bs; 547 } else { 548 stepval = (m-1)*bs; 549 } 550 for (k=0; k<m; k++) { /* loop over added rows */ 551 row = im[k]; 552 if (row < 0) continue; 553 #if defined(PETSC_USE_BOPT_g) 554 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 555 #endif 556 rp = aj + ai[row]; 557 ap = aa + bs2*ai[row]; 558 rmax = imax[row]; 559 nrow = ailen[row]; 560 low = 0; 561 for (l=0; l<n; l++) { /* loop over added columns */ 562 if (in[l] < 0) continue; 563 col = in[l]; 564 #if defined(PETSC_USE_BOPT_g) 565 if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",col,a->nbs-1); 566 #endif 567 if (col < row) continue; /* ignore lower triangular block */ 568 if (roworiented) { 569 value = v + k*(stepval+bs)*bs + l*bs; 570 } else { 571 value = v + l*(stepval+bs)*bs + k*bs; 572 } 573 if (!sorted) low = 0; high = nrow; 574 while (high-low > 7) { 575 t = (low+high)/2; 576 if (rp[t] > col) high = t; 577 else low = t; 578 } 579 for (i=low; i<high; i++) { 580 if (rp[i] > col) break; 581 if (rp[i] == col) { 582 bap = ap + bs2*i; 583 if (roworiented) { 584 if (is == ADD_VALUES) { 585 for (ii=0; ii<bs; ii++,value+=stepval) { 586 for (jj=ii; jj<bs2; jj+=bs) { 587 bap[jj] += *value++; 588 } 589 } 590 } else { 591 for (ii=0; ii<bs; ii++,value+=stepval) { 592 for (jj=ii; jj<bs2; jj+=bs) { 593 bap[jj] = *value++; 594 } 595 } 596 } 597 } else { 598 if (is == ADD_VALUES) { 599 for (ii=0; ii<bs; ii++,value+=stepval) { 600 for (jj=0; jj<bs; jj++) { 601 *bap++ += *value++; 602 } 603 } 604 } else { 605 for (ii=0; ii<bs; ii++,value+=stepval) { 606 for (jj=0; jj<bs; jj++) { 607 *bap++ = *value++; 608 } 609 } 610 } 611 } 612 goto noinsert2; 613 } 614 } 615 if (nonew == 1) goto noinsert2; 616 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 617 if (nrow >= rmax) { 618 /* there is no extra room in row, therefore enlarge */ 619 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 620 MatScalar *new_a; 621 622 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 623 624 /* malloc new storage space */ 625 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 626 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 627 new_j = (int*)(new_a + bs2*new_nz); 628 new_i = new_j + new_nz; 629 630 /* copy over old data into new slots */ 631 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 632 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 633 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 634 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 635 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 636 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 637 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 638 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 639 /* free up old matrix storage */ 640 ierr = PetscFree(a->a);CHKERRQ(ierr); 641 if (!a->singlemalloc) { 642 ierr = PetscFree(a->i);CHKERRQ(ierr); 643 ierr = PetscFree(a->j);CHKERRQ(ierr); 644 } 645 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 646 a->singlemalloc = PETSC_TRUE; 647 648 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 649 rmax = imax[row] = imax[row] + CHUNKSIZE; 650 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 651 a->maxnz += bs2*CHUNKSIZE; 652 a->reallocs++; 653 a->nz++; 654 } 655 N = nrow++ - 1; 656 /* shift up all the later entries in this row */ 657 for (ii=N; ii>=i; ii--) { 658 rp[ii+1] = rp[ii]; 659 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 660 } 661 if (N >= i) { 662 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 663 } 664 rp[i] = col; 665 bap = ap + bs2*i; 666 if (roworiented) { 667 for (ii=0; ii<bs; ii++,value+=stepval) { 668 for (jj=ii; jj<bs2; jj+=bs) { 669 bap[jj] = *value++; 670 } 671 } 672 } else { 673 for (ii=0; ii<bs; ii++,value+=stepval) { 674 for (jj=0; jj<bs; jj++) { 675 *bap++ = *value++; 676 } 677 } 678 } 679 noinsert2:; 680 low = i; 681 } 682 ailen[row] = nrow; 683 } 684 PetscFunctionReturn(0); 685 } 686 687 #undef __FUNCT__ 688 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 689 PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 690 { 691 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 692 PetscErrorCode ierr; 693 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 694 int m = A->m,*ip,N,*ailen = a->ilen; 695 int mbs = a->mbs,bs2 = a->bs2,rmax = 0; 696 MatScalar *aa = a->a,*ap; 697 698 PetscFunctionBegin; 699 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 700 701 if (m) rmax = ailen[0]; 702 for (i=1; i<mbs; i++) { 703 /* move each row back by the amount of empty slots (fshift) before it*/ 704 fshift += imax[i-1] - ailen[i-1]; 705 rmax = PetscMax(rmax,ailen[i]); 706 if (fshift) { 707 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 708 N = ailen[i]; 709 for (j=0; j<N; j++) { 710 ip[j-fshift] = ip[j]; 711 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 712 } 713 } 714 ai[i] = ai[i-1] + ailen[i-1]; 715 } 716 if (mbs) { 717 fshift += imax[mbs-1] - ailen[mbs-1]; 718 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 719 } 720 /* reset ilen and imax for each row */ 721 for (i=0; i<mbs; i++) { 722 ailen[i] = imax[i] = ai[i+1] - ai[i]; 723 } 724 a->nz = ai[mbs]; 725 726 /* diagonals may have moved, reset it */ 727 if (a->diag) { 728 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(int));CHKERRQ(ierr); 729 } 730 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n", 731 m,A->m,A->bs,fshift*bs2,a->nz*bs2); 732 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %D\n", 733 a->reallocs); 734 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %D\n",rmax); 735 a->reallocs = 0; 736 A->info.nz_unneeded = (PetscReal)fshift*bs2; 737 738 PetscFunctionReturn(0); 739 } 740 741 /* 742 This function returns an array of flags which indicate the locations of contiguous 743 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 744 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 745 Assume: sizes should be long enough to hold all the values. 746 */ 747 #undef __FUNCT__ 748 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 749 PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 750 { 751 int i,j,k,row; 752 PetscTruth flg; 753 754 PetscFunctionBegin; 755 for (i=0,j=0; i<n; j++) { 756 row = idx[i]; 757 if (row%bs!=0) { /* Not the begining of a block */ 758 sizes[j] = 1; 759 i++; 760 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 761 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 762 i++; 763 } else { /* Begining of the block, so check if the complete block exists */ 764 flg = PETSC_TRUE; 765 for (k=1; k<bs; k++) { 766 if (row+k != idx[i+k]) { /* break in the block */ 767 flg = PETSC_FALSE; 768 break; 769 } 770 } 771 if (flg == PETSC_TRUE) { /* No break in the bs */ 772 sizes[j] = bs; 773 i+= bs; 774 } else { 775 sizes[j] = 1; 776 i++; 777 } 778 } 779 } 780 *bs_max = j; 781 PetscFunctionReturn(0); 782 } 783 784 #undef __FUNCT__ 785 #define __FUNCT__ "MatZeroRows_SeqSBAIJ" 786 PetscErrorCode MatZeroRows_SeqSBAIJ(Mat A,IS is,const PetscScalar *diag) 787 { 788 PetscFunctionBegin; 789 SETERRQ(PETSC_ERR_SUP,"No support for this function yet"); 790 } 791 792 /* Only add/insert a(i,j) with i<=j (blocks). 793 Any a(i,j) with i>j input by user is ingored. 794 */ 795 796 #undef __FUNCT__ 797 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 798 PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is) 799 { 800 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 801 PetscErrorCode ierr; 802 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 803 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 804 int *aj=a->j,nonew=a->nonew,bs=A->bs,brow,bcol; 805 int ridx,cidx,bs2=a->bs2; 806 MatScalar *ap,value,*aa=a->a,*bap; 807 808 PetscFunctionBegin; 809 810 for (k=0; k<m; k++) { /* loop over added rows */ 811 row = im[k]; /* row number */ 812 brow = row/bs; /* block row number */ 813 if (row < 0) continue; 814 #if defined(PETSC_USE_BOPT_g) 815 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1); 816 #endif 817 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 818 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 819 rmax = imax[brow]; /* maximum space allocated for this row */ 820 nrow = ailen[brow]; /* actual length of this row */ 821 low = 0; 822 823 for (l=0; l<n; l++) { /* loop over added columns */ 824 if (in[l] < 0) continue; 825 #if defined(PETSC_USE_BOPT_g) 826 if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->m-1); 827 #endif 828 col = in[l]; 829 bcol = col/bs; /* block col number */ 830 831 if (brow <= bcol){ 832 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 833 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 834 /* element value a(k,l) */ 835 if (roworiented) { 836 value = v[l + k*n]; 837 } else { 838 value = v[k + l*m]; 839 } 840 841 /* move pointer bap to a(k,l) quickly and add/insert value */ 842 if (!sorted) low = 0; high = nrow; 843 while (high-low > 7) { 844 t = (low+high)/2; 845 if (rp[t] > bcol) high = t; 846 else low = t; 847 } 848 for (i=low; i<high; i++) { 849 /* printf("The loop of i=low.., rp[%D]=%D\n",i,rp[i]); */ 850 if (rp[i] > bcol) break; 851 if (rp[i] == bcol) { 852 bap = ap + bs2*i + bs*cidx + ridx; 853 if (is == ADD_VALUES) *bap += value; 854 else *bap = value; 855 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 856 if (brow == bcol && ridx < cidx){ 857 bap = ap + bs2*i + bs*ridx + cidx; 858 if (is == ADD_VALUES) *bap += value; 859 else *bap = value; 860 } 861 goto noinsert1; 862 } 863 } 864 865 if (nonew == 1) goto noinsert1; 866 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 867 if (nrow >= rmax) { 868 /* there is no extra room in row, therefore enlarge */ 869 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 870 MatScalar *new_a; 871 872 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 873 874 /* Malloc new storage space */ 875 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 876 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 877 new_j = (int*)(new_a + bs2*new_nz); 878 new_i = new_j + new_nz; 879 880 /* copy over old data into new slots */ 881 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 882 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 883 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 884 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 885 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 886 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 887 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 888 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 889 /* free up old matrix storage */ 890 ierr = PetscFree(a->a);CHKERRQ(ierr); 891 if (!a->singlemalloc) { 892 ierr = PetscFree(a->i);CHKERRQ(ierr); 893 ierr = PetscFree(a->j);CHKERRQ(ierr); 894 } 895 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 896 a->singlemalloc = PETSC_TRUE; 897 898 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 899 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 900 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 901 a->maxnz += bs2*CHUNKSIZE; 902 a->reallocs++; 903 a->nz++; 904 } 905 906 N = nrow++ - 1; 907 /* shift up all the later entries in this row */ 908 for (ii=N; ii>=i; ii--) { 909 rp[ii+1] = rp[ii]; 910 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 911 } 912 if (N>=i) { 913 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 914 } 915 rp[i] = bcol; 916 ap[bs2*i + bs*cidx + ridx] = value; 917 noinsert1:; 918 low = i; 919 /* } */ 920 } 921 } /* end of if .. if.. */ 922 } /* end of loop over added columns */ 923 ailen[brow] = nrow; 924 } /* end of loop over added rows */ 925 926 PetscFunctionReturn(0); 927 } 928 929 EXTERN PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*); 930 EXTERN PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*); 931 EXTERN PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS[],int); 932 EXTERN PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*); 933 EXTERN PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat,int,const IS[],const IS[],MatReuse,Mat*[]); 934 EXTERN PetscErrorCode MatScale_SeqSBAIJ(const PetscScalar*,Mat); 935 EXTERN PetscErrorCode MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *); 936 EXTERN PetscErrorCode MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*); 937 EXTERN PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat,Vec); 938 EXTERN PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec); 939 EXTERN PetscErrorCode MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *); 940 EXTERN PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat); 941 EXTERN PetscErrorCode MatGetRowMax_SeqSBAIJ(Mat,Vec); 942 943 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N(Mat,Vec,Vec); 944 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1(Mat,Vec,Vec); 945 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2(Mat,Vec,Vec); 946 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3(Mat,Vec,Vec); 947 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4(Mat,Vec,Vec); 948 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5(Mat,Vec,Vec); 949 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6(Mat,Vec,Vec); 950 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7(Mat,Vec,Vec); 951 952 EXTERN PetscErrorCode MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs); 953 954 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec); 955 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec); 956 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec); 957 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 958 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec); 959 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec); 960 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec); 961 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec); 962 963 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*); 964 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*); 965 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*); 966 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*); 967 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*); 968 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*); 969 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*); 970 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*); 971 EXTERN PetscErrorCode MatGetInertia_SeqSBAIJ(Mat,int*,int*,int*); 972 973 EXTERN PetscErrorCode MatMult_SeqSBAIJ_1(Mat,Vec,Vec); 974 EXTERN PetscErrorCode MatMult_SeqSBAIJ_2(Mat,Vec,Vec); 975 EXTERN PetscErrorCode MatMult_SeqSBAIJ_3(Mat,Vec,Vec); 976 EXTERN PetscErrorCode MatMult_SeqSBAIJ_4(Mat,Vec,Vec); 977 EXTERN PetscErrorCode MatMult_SeqSBAIJ_5(Mat,Vec,Vec); 978 EXTERN PetscErrorCode MatMult_SeqSBAIJ_6(Mat,Vec,Vec); 979 EXTERN PetscErrorCode MatMult_SeqSBAIJ_7(Mat,Vec,Vec); 980 EXTERN PetscErrorCode MatMult_SeqSBAIJ_N(Mat,Vec,Vec); 981 982 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec); 983 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec); 984 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec); 985 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec); 986 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec); 987 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec); 988 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec); 989 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec); 990 991 #ifdef HAVE_MatICCFactor 992 /* modified from MatILUFactor_SeqSBAIJ, needs further work! */ 993 #undef __FUNCT__ 994 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 995 PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info) 996 { 997 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 998 Mat outA; 999 PetscErrorCode ierr; 1000 PetscTruth row_identity,col_identity; 1001 1002 PetscFunctionBegin; 1003 outA = inA; 1004 inA->factor = FACTOR_CHOLESKY; 1005 1006 if (!a->diag) { 1007 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 1008 } 1009 /* 1010 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1011 for ILU(0) factorization with natural ordering 1012 */ 1013 switch (a->bs) { 1014 case 1: 1015 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1016 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1017 inA->ops->solves = MatSolves_SeqSBAIJ_1; 1018 PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n"); 1019 case 2: 1020 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 1021 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1022 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1023 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1024 break; 1025 case 3: 1026 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 1027 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1028 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1029 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n"); 1030 break; 1031 case 4: 1032 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 1033 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1034 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1035 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1036 break; 1037 case 5: 1038 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 1039 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1040 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1041 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1042 break; 1043 case 6: 1044 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 1045 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1046 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1047 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1048 break; 1049 case 7: 1050 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 1051 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1052 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1053 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1054 break; 1055 default: 1056 a->row = row; 1057 a->icol = col; 1058 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1059 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1060 1061 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1062 ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr); 1063 PetscLogObjectParent(inA,a->icol); 1064 1065 if (!a->solve_work) { 1066 ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1067 PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar)); 1068 } 1069 } 1070 1071 ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr); 1072 1073 PetscFunctionReturn(0); 1074 } 1075 #endif 1076 1077 #undef __FUNCT__ 1078 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ" 1079 PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat A) 1080 { 1081 static PetscTruth called = PETSC_FALSE; 1082 MPI_Comm comm = A->comm; 1083 PetscErrorCode ierr; 1084 1085 PetscFunctionBegin; 1086 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1087 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1088 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1089 PetscFunctionReturn(0); 1090 } 1091 1092 EXTERN_C_BEGIN 1093 #undef __FUNCT__ 1094 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1095 PetscErrorCode MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices) 1096 { 1097 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1098 int i,nz,n; 1099 1100 PetscFunctionBegin; 1101 nz = baij->maxnz; 1102 n = mat->n; 1103 for (i=0; i<nz; i++) { 1104 baij->j[i] = indices[i]; 1105 } 1106 baij->nz = nz; 1107 for (i=0; i<n; i++) { 1108 baij->ilen[i] = baij->imax[i]; 1109 } 1110 1111 PetscFunctionReturn(0); 1112 } 1113 EXTERN_C_END 1114 1115 #undef __FUNCT__ 1116 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1117 /*@ 1118 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1119 in the matrix. 1120 1121 Input Parameters: 1122 + mat - the SeqSBAIJ matrix 1123 - indices - the column indices 1124 1125 Level: advanced 1126 1127 Notes: 1128 This can be called if you have precomputed the nonzero structure of the 1129 matrix and want to provide it to the matrix object to improve the performance 1130 of the MatSetValues() operation. 1131 1132 You MUST have set the correct numbers of nonzeros per row in the call to 1133 MatCreateSeqSBAIJ(). 1134 1135 MUST be called before any calls to MatSetValues() 1136 1137 .seealso: MatCreateSeqSBAIJ 1138 @*/ 1139 PetscErrorCode MatSeqSBAIJSetColumnIndices(Mat mat,int *indices) 1140 { 1141 PetscErrorCode ierr,(*f)(Mat,int *); 1142 1143 PetscFunctionBegin; 1144 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1145 PetscValidPointer(indices,2); 1146 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1147 if (f) { 1148 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1149 } else { 1150 SETERRQ(PETSC_ERR_SUP,"Wrong type of matrix to set column indices"); 1151 } 1152 PetscFunctionReturn(0); 1153 } 1154 1155 #undef __FUNCT__ 1156 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1157 PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A) 1158 { 1159 PetscErrorCode ierr; 1160 1161 PetscFunctionBegin; 1162 ierr = MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1163 PetscFunctionReturn(0); 1164 } 1165 1166 #undef __FUNCT__ 1167 #define __FUNCT__ "MatGetArray_SeqSBAIJ" 1168 PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1169 { 1170 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1171 PetscFunctionBegin; 1172 *array = a->a; 1173 PetscFunctionReturn(0); 1174 } 1175 1176 #undef __FUNCT__ 1177 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ" 1178 PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1179 { 1180 PetscFunctionBegin; 1181 PetscFunctionReturn(0); 1182 } 1183 1184 #include "petscblaslapack.h" 1185 #undef __FUNCT__ 1186 #define __FUNCT__ "MatAXPY_SeqSBAIJ" 1187 PetscErrorCode MatAXPY_SeqSBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str) 1188 { 1189 Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data; 1190 PetscErrorCode ierr; 1191 PetscInt i,bs=Y->bs,bs2,j; 1192 PetscBLASInt bnz = (PetscBLASInt)x->nz,one = 1; 1193 1194 PetscFunctionBegin; 1195 if (str == SAME_NONZERO_PATTERN) { 1196 BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1197 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1198 if (y->xtoy && y->XtoY != X) { 1199 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1200 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1201 } 1202 if (!y->xtoy) { /* get xtoy */ 1203 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1204 y->XtoY = X; 1205 } 1206 bs2 = bs*bs; 1207 for (i=0; i<x->nz; i++) { 1208 j = 0; 1209 while (j < bs2){ 1210 y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]); 1211 j++; 1212 } 1213 } 1214 PetscLogInfo(0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)); 1215 } else { 1216 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1217 } 1218 PetscFunctionReturn(0); 1219 } 1220 1221 #undef __FUNCT__ 1222 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ" 1223 PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg) 1224 { 1225 PetscFunctionBegin; 1226 *flg = PETSC_TRUE; 1227 PetscFunctionReturn(0); 1228 } 1229 1230 #undef __FUNCT__ 1231 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ" 1232 PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg) 1233 { 1234 PetscFunctionBegin; 1235 *flg = PETSC_TRUE; 1236 PetscFunctionReturn(0); 1237 } 1238 1239 #undef __FUNCT__ 1240 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ" 1241 PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscTruth *flg) 1242 { 1243 PetscFunctionBegin; 1244 *flg = PETSC_FALSE; 1245 PetscFunctionReturn(0); 1246 } 1247 1248 /* -------------------------------------------------------------------*/ 1249 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1250 MatGetRow_SeqSBAIJ, 1251 MatRestoreRow_SeqSBAIJ, 1252 MatMult_SeqSBAIJ_N, 1253 /* 4*/ MatMultAdd_SeqSBAIJ_N, 1254 MatMult_SeqSBAIJ_N, 1255 MatMultAdd_SeqSBAIJ_N, 1256 MatSolve_SeqSBAIJ_N, 1257 0, 1258 0, 1259 /*10*/ 0, 1260 0, 1261 MatCholeskyFactor_SeqSBAIJ, 1262 MatRelax_SeqSBAIJ, 1263 MatTranspose_SeqSBAIJ, 1264 /*15*/ MatGetInfo_SeqSBAIJ, 1265 MatEqual_SeqSBAIJ, 1266 MatGetDiagonal_SeqSBAIJ, 1267 MatDiagonalScale_SeqSBAIJ, 1268 MatNorm_SeqSBAIJ, 1269 /*20*/ 0, 1270 MatAssemblyEnd_SeqSBAIJ, 1271 0, 1272 MatSetOption_SeqSBAIJ, 1273 MatZeroEntries_SeqSBAIJ, 1274 /*25*/ MatZeroRows_SeqSBAIJ, 1275 0, 1276 0, 1277 MatCholeskyFactorSymbolic_SeqSBAIJ, 1278 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1279 /*30*/ MatSetUpPreallocation_SeqSBAIJ, 1280 0, 1281 MatICCFactorSymbolic_SeqSBAIJ, 1282 MatGetArray_SeqSBAIJ, 1283 MatRestoreArray_SeqSBAIJ, 1284 /*35*/ MatDuplicate_SeqSBAIJ, 1285 0, 1286 0, 1287 0, 1288 0, 1289 /*40*/ MatAXPY_SeqSBAIJ, 1290 MatGetSubMatrices_SeqSBAIJ, 1291 MatIncreaseOverlap_SeqSBAIJ, 1292 MatGetValues_SeqSBAIJ, 1293 0, 1294 /*45*/ MatPrintHelp_SeqSBAIJ, 1295 MatScale_SeqSBAIJ, 1296 0, 1297 0, 1298 0, 1299 /*50*/ 0, 1300 MatGetRowIJ_SeqSBAIJ, 1301 MatRestoreRowIJ_SeqSBAIJ, 1302 0, 1303 0, 1304 /*55*/ 0, 1305 0, 1306 0, 1307 0, 1308 MatSetValuesBlocked_SeqSBAIJ, 1309 /*60*/ MatGetSubMatrix_SeqSBAIJ, 1310 0, 1311 0, 1312 MatGetPetscMaps_Petsc, 1313 0, 1314 /*65*/ 0, 1315 0, 1316 0, 1317 0, 1318 0, 1319 /*70*/ MatGetRowMax_SeqSBAIJ, 1320 0, 1321 0, 1322 0, 1323 0, 1324 /*75*/ 0, 1325 0, 1326 0, 1327 0, 1328 0, 1329 /*80*/ 0, 1330 0, 1331 0, 1332 #if !defined(PETSC_USE_COMPLEX) 1333 MatGetInertia_SeqSBAIJ, 1334 #else 1335 0, 1336 #endif 1337 MatLoad_SeqSBAIJ, 1338 /*85*/ MatIsSymmetric_SeqSBAIJ, 1339 MatIsHermitian_SeqSBAIJ, 1340 MatIsStructurallySymmetric_SeqSBAIJ, 1341 0, 1342 0, 1343 /*90*/ 0, 1344 0, 1345 0, 1346 0, 1347 0, 1348 /*95*/ 0, 1349 0, 1350 0, 1351 0}; 1352 1353 EXTERN_C_BEGIN 1354 #undef __FUNCT__ 1355 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1356 PetscErrorCode MatStoreValues_SeqSBAIJ(Mat mat) 1357 { 1358 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1359 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 1360 PetscErrorCode ierr; 1361 1362 PetscFunctionBegin; 1363 if (aij->nonew != 1) { 1364 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1365 } 1366 1367 /* allocate space for values if not already there */ 1368 if (!aij->saved_values) { 1369 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1370 } 1371 1372 /* copy values over */ 1373 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1374 PetscFunctionReturn(0); 1375 } 1376 EXTERN_C_END 1377 1378 EXTERN_C_BEGIN 1379 #undef __FUNCT__ 1380 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1381 PetscErrorCode MatRetrieveValues_SeqSBAIJ(Mat mat) 1382 { 1383 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1384 PetscErrorCode ierr; 1385 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 1386 1387 PetscFunctionBegin; 1388 if (aij->nonew != 1) { 1389 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1390 } 1391 if (!aij->saved_values) { 1392 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 1393 } 1394 1395 /* copy values over */ 1396 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1397 PetscFunctionReturn(0); 1398 } 1399 EXTERN_C_END 1400 1401 EXTERN_C_BEGIN 1402 #undef __FUNCT__ 1403 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ" 1404 PetscErrorCode MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,int bs,int nz,int *nnz) 1405 { 1406 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1407 PetscErrorCode ierr; 1408 int i,len,mbs,bs2; 1409 PetscTruth flg; 1410 1411 PetscFunctionBegin; 1412 B->preallocated = PETSC_TRUE; 1413 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1414 mbs = B->m/bs; 1415 bs2 = bs*bs; 1416 1417 if (mbs*bs != B->m) { 1418 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1419 } 1420 1421 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1422 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 1423 if (nnz) { 1424 for (i=0; i<mbs; i++) { 1425 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 1426 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],mbs); 1427 } 1428 } 1429 1430 ierr = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1431 if (!flg) { 1432 switch (bs) { 1433 case 1: 1434 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1435 B->ops->solve = MatSolve_SeqSBAIJ_1; 1436 B->ops->solves = MatSolves_SeqSBAIJ_1; 1437 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 1438 B->ops->mult = MatMult_SeqSBAIJ_1; 1439 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1440 break; 1441 case 2: 1442 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1443 B->ops->solve = MatSolve_SeqSBAIJ_2; 1444 B->ops->solvetranspose = MatSolve_SeqSBAIJ_2; 1445 B->ops->mult = MatMult_SeqSBAIJ_2; 1446 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1447 break; 1448 case 3: 1449 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1450 B->ops->solve = MatSolve_SeqSBAIJ_3; 1451 B->ops->solvetranspose = MatSolve_SeqSBAIJ_3; 1452 B->ops->mult = MatMult_SeqSBAIJ_3; 1453 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1454 break; 1455 case 4: 1456 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1457 B->ops->solve = MatSolve_SeqSBAIJ_4; 1458 B->ops->solvetranspose = MatSolve_SeqSBAIJ_4; 1459 B->ops->mult = MatMult_SeqSBAIJ_4; 1460 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1461 break; 1462 case 5: 1463 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1464 B->ops->solve = MatSolve_SeqSBAIJ_5; 1465 B->ops->solvetranspose = MatSolve_SeqSBAIJ_5; 1466 B->ops->mult = MatMult_SeqSBAIJ_5; 1467 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1468 break; 1469 case 6: 1470 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1471 B->ops->solve = MatSolve_SeqSBAIJ_6; 1472 B->ops->solvetranspose = MatSolve_SeqSBAIJ_6; 1473 B->ops->mult = MatMult_SeqSBAIJ_6; 1474 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1475 break; 1476 case 7: 1477 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1478 B->ops->solve = MatSolve_SeqSBAIJ_7; 1479 B->ops->solvetranspose = MatSolve_SeqSBAIJ_7; 1480 B->ops->mult = MatMult_SeqSBAIJ_7; 1481 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1482 break; 1483 } 1484 } 1485 1486 b->mbs = mbs; 1487 b->nbs = mbs; 1488 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 1489 if (!nnz) { 1490 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1491 else if (nz <= 0) nz = 1; 1492 for (i=0; i<mbs; i++) { 1493 b->imax[i] = nz; 1494 } 1495 nz = nz*mbs; /* total nz */ 1496 } else { 1497 nz = 0; 1498 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1499 } 1500 /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1501 1502 /* allocate the matrix space */ 1503 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 1504 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1505 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1506 b->j = (int*)(b->a + nz*bs2); 1507 ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 1508 b->i = b->j + nz; 1509 b->singlemalloc = PETSC_TRUE; 1510 1511 /* pointer to beginning of each row */ 1512 b->i[0] = 0; 1513 for (i=1; i<mbs+1; i++) { 1514 b->i[i] = b->i[i-1] + b->imax[i-1]; 1515 } 1516 1517 /* b->ilen will count nonzeros in each block row so far. */ 1518 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 1519 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1520 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1521 1522 B->bs = bs; 1523 b->bs2 = bs2; 1524 b->nz = 0; 1525 b->maxnz = nz*bs2; 1526 1527 b->inew = 0; 1528 b->jnew = 0; 1529 b->anew = 0; 1530 b->a2anew = 0; 1531 b->permute = PETSC_FALSE; 1532 PetscFunctionReturn(0); 1533 } 1534 EXTERN_C_END 1535 1536 EXTERN_C_BEGIN 1537 EXTERN PetscErrorCode MatConvert_SeqSBAIJ_SeqAIJ(Mat,const MatType,Mat*); 1538 EXTERN PetscErrorCode MatConvert_SeqSBAIJ_SeqBAIJ(Mat,const MatType,Mat*); 1539 EXTERN_C_END 1540 1541 /*MC 1542 MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices, 1543 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1544 1545 Options Database Keys: 1546 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions() 1547 1548 Level: beginner 1549 1550 .seealso: MatCreateSeqSBAIJ 1551 M*/ 1552 1553 EXTERN_C_BEGIN 1554 #undef __FUNCT__ 1555 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1556 PetscErrorCode MatCreate_SeqSBAIJ(Mat B) 1557 { 1558 Mat_SeqSBAIJ *b; 1559 PetscErrorCode ierr; 1560 int size; 1561 1562 PetscFunctionBegin; 1563 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1564 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1565 B->m = B->M = PetscMax(B->m,B->M); 1566 B->n = B->N = PetscMax(B->n,B->N); 1567 1568 ierr = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1569 B->data = (void*)b; 1570 ierr = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr); 1571 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1572 B->ops->destroy = MatDestroy_SeqSBAIJ; 1573 B->ops->view = MatView_SeqSBAIJ; 1574 B->factor = 0; 1575 B->lupivotthreshold = 1.0; 1576 B->mapping = 0; 1577 b->row = 0; 1578 b->icol = 0; 1579 b->reallocs = 0; 1580 b->saved_values = 0; 1581 1582 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1583 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1584 1585 b->sorted = PETSC_FALSE; 1586 b->roworiented = PETSC_TRUE; 1587 b->nonew = 0; 1588 b->diag = 0; 1589 b->solve_work = 0; 1590 b->mult_work = 0; 1591 B->spptr = 0; 1592 b->keepzeroedrows = PETSC_FALSE; 1593 b->xtoy = 0; 1594 b->XtoY = 0; 1595 1596 b->inew = 0; 1597 b->jnew = 0; 1598 b->anew = 0; 1599 b->a2anew = 0; 1600 b->permute = PETSC_FALSE; 1601 1602 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1603 "MatStoreValues_SeqSBAIJ", 1604 MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1605 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1606 "MatRetrieveValues_SeqSBAIJ", 1607 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1608 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1609 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1610 MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1611 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C", 1612 "MatConvert_SeqSBAIJ_SeqAIJ", 1613 MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr); 1614 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C", 1615 "MatConvert_SeqSBAIJ_SeqBAIJ", 1616 MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr); 1617 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C", 1618 "MatSeqSBAIJSetPreallocation_SeqSBAIJ", 1619 MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr); 1620 1621 B->symmetric = PETSC_TRUE; 1622 B->structurally_symmetric = PETSC_TRUE; 1623 B->symmetric_set = PETSC_TRUE; 1624 B->structurally_symmetric_set = PETSC_TRUE; 1625 PetscFunctionReturn(0); 1626 } 1627 EXTERN_C_END 1628 1629 #undef __FUNCT__ 1630 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1631 /*@C 1632 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1633 compressed row) format. For good matrix assembly performance the 1634 user should preallocate the matrix storage by setting the parameter nz 1635 (or the array nnz). By setting these parameters accurately, performance 1636 during matrix assembly can be increased by more than a factor of 50. 1637 1638 Collective on Mat 1639 1640 Input Parameters: 1641 + A - the symmetric matrix 1642 . bs - size of block 1643 . nz - number of block nonzeros per block row (same for all rows) 1644 - nnz - array containing the number of block nonzeros in the upper triangular plus 1645 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1646 1647 Options Database Keys: 1648 . -mat_no_unroll - uses code that does not unroll the loops in the 1649 block calculations (much slower) 1650 . -mat_block_size - size of the blocks to use 1651 1652 Level: intermediate 1653 1654 Notes: 1655 Specify the preallocated storage with either nz or nnz (not both). 1656 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1657 allocation. For additional details, see the users manual chapter on 1658 matrices. 1659 1660 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1661 @*/ 1662 PetscErrorCode MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[]) { 1663 PetscErrorCode ierr,(*f)(Mat,int,int,const int[]); 1664 1665 PetscFunctionBegin; 1666 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1667 if (f) { 1668 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 1669 } 1670 PetscFunctionReturn(0); 1671 } 1672 1673 #undef __FUNCT__ 1674 #define __FUNCT__ "MatCreateSeqSBAIJ" 1675 /*@C 1676 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1677 compressed row) format. For good matrix assembly performance the 1678 user should preallocate the matrix storage by setting the parameter nz 1679 (or the array nnz). By setting these parameters accurately, performance 1680 during matrix assembly can be increased by more than a factor of 50. 1681 1682 Collective on MPI_Comm 1683 1684 Input Parameters: 1685 + comm - MPI communicator, set to PETSC_COMM_SELF 1686 . bs - size of block 1687 . m - number of rows, or number of columns 1688 . nz - number of block nonzeros per block row (same for all rows) 1689 - nnz - array containing the number of block nonzeros in the upper triangular plus 1690 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1691 1692 Output Parameter: 1693 . A - the symmetric matrix 1694 1695 Options Database Keys: 1696 . -mat_no_unroll - uses code that does not unroll the loops in the 1697 block calculations (much slower) 1698 . -mat_block_size - size of the blocks to use 1699 1700 Level: intermediate 1701 1702 Notes: 1703 1704 Specify the preallocated storage with either nz or nnz (not both). 1705 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1706 allocation. For additional details, see the users manual chapter on 1707 matrices. 1708 1709 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1710 @*/ 1711 PetscErrorCode MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A) 1712 { 1713 PetscErrorCode ierr; 1714 1715 PetscFunctionBegin; 1716 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1717 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1718 ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1719 PetscFunctionReturn(0); 1720 } 1721 1722 #undef __FUNCT__ 1723 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1724 PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1725 { 1726 Mat C; 1727 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1728 PetscErrorCode ierr; 1729 int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 1730 1731 PetscFunctionBegin; 1732 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1733 1734 *B = 0; 1735 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1736 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 1737 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1738 c = (Mat_SeqSBAIJ*)C->data; 1739 1740 C->preallocated = PETSC_TRUE; 1741 C->factor = A->factor; 1742 c->row = 0; 1743 c->icol = 0; 1744 c->saved_values = 0; 1745 c->keepzeroedrows = a->keepzeroedrows; 1746 C->assembled = PETSC_TRUE; 1747 1748 C->M = A->M; 1749 C->N = A->N; 1750 C->bs = A->bs; 1751 c->bs2 = a->bs2; 1752 c->mbs = a->mbs; 1753 c->nbs = a->nbs; 1754 1755 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1756 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1757 for (i=0; i<mbs; i++) { 1758 c->imax[i] = a->imax[i]; 1759 c->ilen[i] = a->ilen[i]; 1760 } 1761 1762 /* allocate the matrix space */ 1763 c->singlemalloc = PETSC_TRUE; 1764 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1765 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1766 c->j = (int*)(c->a + nz*bs2); 1767 c->i = c->j + nz; 1768 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1769 if (mbs > 0) { 1770 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1771 if (cpvalues == MAT_COPY_VALUES) { 1772 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1773 } else { 1774 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1775 } 1776 } 1777 1778 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1779 c->sorted = a->sorted; 1780 c->roworiented = a->roworiented; 1781 c->nonew = a->nonew; 1782 1783 if (a->diag) { 1784 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1785 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1786 for (i=0; i<mbs; i++) { 1787 c->diag[i] = a->diag[i]; 1788 } 1789 } else c->diag = 0; 1790 c->nz = a->nz; 1791 c->maxnz = a->maxnz; 1792 c->solve_work = 0; 1793 c->mult_work = 0; 1794 *B = C; 1795 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1796 PetscFunctionReturn(0); 1797 } 1798 1799 #undef __FUNCT__ 1800 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1801 PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer,const MatType type,Mat *A) 1802 { 1803 Mat_SeqSBAIJ *a; 1804 Mat B; 1805 PetscErrorCode ierr; 1806 int i,nz,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1807 int *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 1808 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1809 int *masked,nmask,tmp,bs2,ishift; 1810 PetscScalar *aa; 1811 MPI_Comm comm = ((PetscObject)viewer)->comm; 1812 1813 PetscFunctionBegin; 1814 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1815 bs2 = bs*bs; 1816 1817 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1818 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1819 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1820 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1821 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1822 M = header[1]; N = header[2]; nz = header[3]; 1823 1824 if (header[3] < 0) { 1825 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1826 } 1827 1828 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1829 1830 /* 1831 This code adds extra rows to make sure the number of rows is 1832 divisible by the blocksize 1833 */ 1834 mbs = M/bs; 1835 extra_rows = bs - M + bs*(mbs); 1836 if (extra_rows == bs) extra_rows = 0; 1837 else mbs++; 1838 if (extra_rows) { 1839 PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n"); 1840 } 1841 1842 /* read in row lengths */ 1843 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1844 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1845 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1846 1847 /* read in column indices */ 1848 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1849 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1850 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1851 1852 /* loop over row lengths determining block row lengths */ 1853 ierr = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr); 1854 ierr = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1855 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1856 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1857 masked = mask + mbs; 1858 rowcount = 0; nzcount = 0; 1859 for (i=0; i<mbs; i++) { 1860 nmask = 0; 1861 for (j=0; j<bs; j++) { 1862 kmax = rowlengths[rowcount]; 1863 for (k=0; k<kmax; k++) { 1864 tmp = jj[nzcount++]/bs; /* block col. index */ 1865 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1866 } 1867 rowcount++; 1868 } 1869 s_browlengths[i] += nmask; 1870 1871 /* zero out the mask elements we set */ 1872 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1873 } 1874 1875 /* create our matrix */ 1876 ierr = MatCreate(comm,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows,&B);CHKERRQ(ierr); 1877 ierr = MatSetType(B,type);CHKERRQ(ierr); 1878 ierr = MatSeqSBAIJSetPreallocation(B,bs,0,s_browlengths);CHKERRQ(ierr); 1879 a = (Mat_SeqSBAIJ*)B->data; 1880 1881 /* set matrix "i" values */ 1882 a->i[0] = 0; 1883 for (i=1; i<= mbs; i++) { 1884 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1885 a->ilen[i-1] = s_browlengths[i-1]; 1886 } 1887 a->nz = a->i[mbs]; 1888 1889 /* read in nonzero values */ 1890 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1891 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1892 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1893 1894 /* set "a" and "j" values into matrix */ 1895 nzcount = 0; jcount = 0; 1896 for (i=0; i<mbs; i++) { 1897 nzcountb = nzcount; 1898 nmask = 0; 1899 for (j=0; j<bs; j++) { 1900 kmax = rowlengths[i*bs+j]; 1901 for (k=0; k<kmax; k++) { 1902 tmp = jj[nzcount++]/bs; /* block col. index */ 1903 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1904 } 1905 } 1906 /* sort the masked values */ 1907 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1908 1909 /* set "j" values into matrix */ 1910 maskcount = 1; 1911 for (j=0; j<nmask; j++) { 1912 a->j[jcount++] = masked[j]; 1913 mask[masked[j]] = maskcount++; 1914 } 1915 1916 /* set "a" values into matrix */ 1917 ishift = bs2*a->i[i]; 1918 for (j=0; j<bs; j++) { 1919 kmax = rowlengths[i*bs+j]; 1920 for (k=0; k<kmax; k++) { 1921 tmp = jj[nzcountb]/bs ; /* block col. index */ 1922 if (tmp >= i){ 1923 block = mask[tmp] - 1; 1924 point = jj[nzcountb] - bs*tmp; 1925 idx = ishift + bs2*block + j + bs*point; 1926 a->a[idx] = aa[nzcountb]; 1927 } 1928 nzcountb++; 1929 } 1930 } 1931 /* zero out the mask elements we set */ 1932 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1933 } 1934 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1935 1936 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1937 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1938 ierr = PetscFree(aa);CHKERRQ(ierr); 1939 ierr = PetscFree(jj);CHKERRQ(ierr); 1940 ierr = PetscFree(mask);CHKERRQ(ierr); 1941 1942 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1943 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1944 ierr = MatView_Private(B);CHKERRQ(ierr); 1945 *A = B; 1946 PetscFunctionReturn(0); 1947 } 1948 1949 #undef __FUNCT__ 1950 #define __FUNCT__ "MatRelax_SeqSBAIJ" 1951 PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1952 { 1953 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1954 MatScalar *aa=a->a,*v,*v1; 1955 PetscScalar *x,*b,*t,sum,d; 1956 PetscErrorCode ierr; 1957 PetscInt m=a->mbs,bs=A->bs,*ai=a->i,*aj=a->j; 1958 PetscInt nz,nz1,*vj,*vj1,i; 1959 1960 PetscFunctionBegin; 1961 its = its*lits; 1962 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1963 1964 if (bs > 1) 1965 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 1966 1967 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1968 if (xx != bb) { 1969 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1970 } else { 1971 b = x; 1972 } 1973 1974 ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr); 1975 1976 if (flag & SOR_ZERO_INITIAL_GUESS) { 1977 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1978 for (i=0; i<m; i++) 1979 t[i] = b[i]; 1980 1981 for (i=0; i<m; i++){ 1982 d = *(aa + ai[i]); /* diag[i] */ 1983 v = aa + ai[i] + 1; 1984 vj = aj + ai[i] + 1; 1985 nz = ai[i+1] - ai[i] - 1; 1986 x[i] = omega*t[i]/d; 1987 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 1988 PetscLogFlops(2*nz-1); 1989 } 1990 } 1991 1992 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 1993 for (i=0; i<m; i++) 1994 t[i] = b[i]; 1995 1996 for (i=0; i<m-1; i++){ /* update rhs */ 1997 v = aa + ai[i] + 1; 1998 vj = aj + ai[i] + 1; 1999 nz = ai[i+1] - ai[i] - 1; 2000 while (nz--) t[*vj++] -= x[i]*(*v++); 2001 PetscLogFlops(2*nz-1); 2002 } 2003 for (i=m-1; i>=0; i--){ 2004 d = *(aa + ai[i]); 2005 v = aa + ai[i] + 1; 2006 vj = aj + ai[i] + 1; 2007 nz = ai[i+1] - ai[i] - 1; 2008 sum = t[i]; 2009 while (nz--) sum -= x[*vj++]*(*v++); 2010 PetscLogFlops(2*nz-1); 2011 x[i] = (1-omega)*x[i] + omega*sum/d; 2012 } 2013 } 2014 its--; 2015 } 2016 2017 while (its--) { 2018 /* 2019 forward sweep: 2020 for i=0,...,m-1: 2021 sum[i] = (b[i] - U(i,:)x )/d[i]; 2022 x[i] = (1-omega)x[i] + omega*sum[i]; 2023 b = b - x[i]*U^T(i,:); 2024 2025 */ 2026 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2027 for (i=0; i<m; i++) 2028 t[i] = b[i]; 2029 2030 for (i=0; i<m; i++){ 2031 d = *(aa + ai[i]); /* diag[i] */ 2032 v = aa + ai[i] + 1; v1=v; 2033 vj = aj + ai[i] + 1; vj1=vj; 2034 nz = ai[i+1] - ai[i] - 1; nz1=nz; 2035 sum = t[i]; 2036 while (nz1--) sum -= (*v1++)*x[*vj1++]; 2037 x[i] = (1-omega)*x[i] + omega*sum/d; 2038 while (nz--) t[*vj++] -= x[i]*(*v++); 2039 PetscLogFlops(4*nz-2); 2040 } 2041 } 2042 2043 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2044 /* 2045 backward sweep: 2046 b = b - x[i]*U^T(i,:), i=0,...,n-2 2047 for i=m-1,...,0: 2048 sum[i] = (b[i] - U(i,:)x )/d[i]; 2049 x[i] = (1-omega)x[i] + omega*sum[i]; 2050 */ 2051 for (i=0; i<m; i++) 2052 t[i] = b[i]; 2053 2054 for (i=0; i<m-1; i++){ /* update rhs */ 2055 v = aa + ai[i] + 1; 2056 vj = aj + ai[i] + 1; 2057 nz = ai[i+1] - ai[i] - 1; 2058 while (nz--) t[*vj++] -= x[i]*(*v++); 2059 PetscLogFlops(2*nz-1); 2060 } 2061 for (i=m-1; i>=0; i--){ 2062 d = *(aa + ai[i]); 2063 v = aa + ai[i] + 1; 2064 vj = aj + ai[i] + 1; 2065 nz = ai[i+1] - ai[i] - 1; 2066 sum = t[i]; 2067 while (nz--) sum -= x[*vj++]*(*v++); 2068 PetscLogFlops(2*nz-1); 2069 x[i] = (1-omega)*x[i] + omega*sum/d; 2070 } 2071 } 2072 } 2073 2074 ierr = PetscFree(t);CHKERRQ(ierr); 2075 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2076 if (bb != xx) { 2077 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2078 } 2079 2080 PetscFunctionReturn(0); 2081 } 2082 2083 2084 2085 2086 2087 2088