1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the SBAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 8 #include "src/inline/spops.h" 9 #include "src/mat/impls/sbaij/seq/sbaij.h" 10 11 #define CHUNKSIZE 10 12 13 /* 14 Checks for missing diagonals 15 */ 16 #undef __FUNCT__ 17 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 18 PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A) 19 { 20 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 21 PetscErrorCode ierr; 22 PetscInt *diag,*jj = a->j,i; 23 24 PetscFunctionBegin; 25 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 26 diag = a->diag; 27 for (i=0; i<a->mbs; i++) { 28 if (jj[diag[i]] != i) SETERRQ1(PETSC_ERR_ARG_CORRUPT,"Matrix is missing diagonal number %D",i); 29 } 30 PetscFunctionReturn(0); 31 } 32 33 #undef __FUNCT__ 34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 35 PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A) 36 { 37 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 38 PetscErrorCode ierr; 39 PetscInt i,mbs = a->mbs; 40 41 PetscFunctionBegin; 42 if (a->diag) PetscFunctionReturn(0); 43 44 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 45 ierr = PetscLogObjectMemory(A,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 46 for (i=0; i<mbs; i++) a->diag[i] = a->i[i]; 47 PetscFunctionReturn(0); 48 } 49 50 #undef __FUNCT__ 51 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 52 static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 53 { 54 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 55 PetscInt n = a->mbs,i; 56 57 PetscFunctionBegin; 58 *nn = n; 59 if (!ia) PetscFunctionReturn(0); 60 61 if (oshift == 1) { 62 /* temporarily add 1 to i and j indices */ 63 PetscInt nz = a->i[n]; 64 for (i=0; i<nz; i++) a->j[i]++; 65 for (i=0; i<n+1; i++) a->i[i]++; 66 *ia = a->i; *ja = a->j; 67 } else { 68 *ia = a->i; *ja = a->j; 69 } 70 PetscFunctionReturn(0); 71 } 72 73 #undef __FUNCT__ 74 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 75 static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 76 { 77 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 78 PetscInt i,n = a->mbs; 79 80 PetscFunctionBegin; 81 if (!ia) PetscFunctionReturn(0); 82 83 if (oshift == 1) { 84 PetscInt nz = a->i[n]-1; 85 for (i=0; i<nz; i++) a->j[i]--; 86 for (i=0; i<n+1; i++) a->i[i]--; 87 } 88 PetscFunctionReturn(0); 89 } 90 91 #undef __FUNCT__ 92 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 93 PetscErrorCode MatDestroy_SeqSBAIJ(Mat A) 94 { 95 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 96 PetscErrorCode ierr; 97 98 PetscFunctionBegin; 99 PetscLogObjectState((PetscObject)A,"Rows=%D, NZ=%D",A->m,a->nz); 100 ierr = MatSeqXAIJFreeAIJ(a->singlemalloc,&a->a,&a->j,&a->i);CHKERRQ(ierr); 101 if (a->row) { 102 ierr = ISDestroy(a->row);CHKERRQ(ierr); 103 } 104 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 105 if (a->imax) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 106 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 107 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 108 if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);} 109 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 110 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 111 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 112 113 if (a->inew){ 114 ierr = PetscFree(a->inew);CHKERRQ(ierr); 115 a->inew = 0; 116 } 117 ierr = PetscFree(a);CHKERRQ(ierr); 118 119 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 120 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 121 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 122 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 123 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqbaij_C","",PETSC_NULL);CHKERRQ(ierr); 124 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 125 PetscFunctionReturn(0); 126 } 127 128 #undef __FUNCT__ 129 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 130 PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op) 131 { 132 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 133 PetscErrorCode ierr; 134 135 PetscFunctionBegin; 136 switch (op) { 137 case MAT_ROW_ORIENTED: 138 a->roworiented = PETSC_TRUE; 139 break; 140 case MAT_COLUMN_ORIENTED: 141 a->roworiented = PETSC_FALSE; 142 break; 143 case MAT_COLUMNS_SORTED: 144 a->sorted = PETSC_TRUE; 145 break; 146 case MAT_COLUMNS_UNSORTED: 147 a->sorted = PETSC_FALSE; 148 break; 149 case MAT_KEEP_ZEROED_ROWS: 150 a->keepzeroedrows = PETSC_TRUE; 151 break; 152 case MAT_NO_NEW_NONZERO_LOCATIONS: 153 a->nonew = 1; 154 break; 155 case MAT_NEW_NONZERO_LOCATION_ERR: 156 a->nonew = -1; 157 break; 158 case MAT_NEW_NONZERO_ALLOCATION_ERR: 159 a->nonew = -2; 160 break; 161 case MAT_YES_NEW_NONZERO_LOCATIONS: 162 a->nonew = 0; 163 break; 164 case MAT_ROWS_SORTED: 165 case MAT_ROWS_UNSORTED: 166 case MAT_YES_NEW_DIAGONALS: 167 case MAT_IGNORE_OFF_PROC_ENTRIES: 168 case MAT_USE_HASH_TABLE: 169 ierr = PetscLogInfo((A,"MatSetOption_SeqSBAIJ:Option ignored\n"));CHKERRQ(ierr); 170 break; 171 case MAT_NO_NEW_DIAGONALS: 172 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 173 case MAT_NOT_SYMMETRIC: 174 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 175 case MAT_HERMITIAN: 176 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 177 case MAT_SYMMETRIC: 178 case MAT_STRUCTURALLY_SYMMETRIC: 179 case MAT_NOT_HERMITIAN: 180 case MAT_SYMMETRY_ETERNAL: 181 case MAT_NOT_SYMMETRY_ETERNAL: 182 case MAT_IGNORE_LOWER_TRIANGULAR: 183 a->ignore_ltriangular = PETSC_TRUE; 184 break; 185 case MAT_ERROR_LOWER_TRIANGULAR: 186 a->ignore_ltriangular = PETSC_FALSE; 187 break; 188 default: 189 SETERRQ(PETSC_ERR_SUP,"unknown option"); 190 } 191 PetscFunctionReturn(0); 192 } 193 194 #undef __FUNCT__ 195 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 196 PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **v) 197 { 198 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 199 PetscErrorCode ierr; 200 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2; 201 MatScalar *aa,*aa_i; 202 PetscScalar *v_i; 203 204 PetscFunctionBegin; 205 bs = A->bs; 206 ai = a->i; 207 aj = a->j; 208 aa = a->a; 209 bs2 = a->bs2; 210 211 if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %D out of range", row); 212 213 bn = row/bs; /* Block number */ 214 bp = row % bs; /* Block position */ 215 M = ai[bn+1] - ai[bn]; 216 *ncols = bs*M; 217 218 if (v) { 219 *v = 0; 220 if (*ncols) { 221 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 222 for (i=0; i<M; i++) { /* for each block in the block row */ 223 v_i = *v + i*bs; 224 aa_i = aa + bs2*(ai[bn] + i); 225 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 226 } 227 } 228 } 229 230 if (cols) { 231 *cols = 0; 232 if (*ncols) { 233 ierr = PetscMalloc((*ncols+row)*sizeof(PetscInt),cols);CHKERRQ(ierr); 234 for (i=0; i<M; i++) { /* for each block in the block row */ 235 cols_i = *cols + i*bs; 236 itmp = bs*aj[ai[bn] + i]; 237 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 238 } 239 } 240 } 241 242 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 243 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 244 #ifdef column_search 245 v_i = *v + M*bs; 246 cols_i = *cols + M*bs; 247 for (i=0; i<bn; i++){ /* for each block row */ 248 M = ai[i+1] - ai[i]; 249 for (j=0; j<M; j++){ 250 itmp = aj[ai[i] + j]; /* block column value */ 251 if (itmp == bn){ 252 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 253 for (k=0; k<bs; k++) { 254 *cols_i++ = i*bs+k; 255 *v_i++ = aa_i[k]; 256 } 257 *ncols += bs; 258 break; 259 } 260 } 261 } 262 #endif 263 PetscFunctionReturn(0); 264 } 265 266 #undef __FUNCT__ 267 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 268 PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 269 { 270 PetscErrorCode ierr; 271 272 PetscFunctionBegin; 273 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 274 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 275 PetscFunctionReturn(0); 276 } 277 278 #undef __FUNCT__ 279 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 280 PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,Mat *B) 281 { 282 PetscErrorCode ierr; 283 PetscFunctionBegin; 284 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 285 PetscFunctionReturn(0); 286 } 287 288 #undef __FUNCT__ 289 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 290 static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 291 { 292 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 293 PetscErrorCode ierr; 294 PetscInt i,j,bs = A->bs,k,l,bs2=a->bs2; 295 const char *name; 296 PetscViewerFormat format; 297 298 PetscFunctionBegin; 299 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 300 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 301 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 302 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 303 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 304 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 305 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 306 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 307 for (i=0; i<a->mbs; i++) { 308 for (j=0; j<bs; j++) { 309 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 310 for (k=a->i[i]; k<a->i[i+1]; k++) { 311 for (l=0; l<bs; l++) { 312 #if defined(PETSC_USE_COMPLEX) 313 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 314 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 315 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 316 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 317 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 318 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 319 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 320 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 321 } 322 #else 323 if (a->a[bs2*k + l*bs + j] != 0.0) { 324 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 325 } 326 #endif 327 } 328 } 329 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 330 } 331 } 332 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 333 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 334 PetscFunctionReturn(0); 335 } else { 336 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 337 for (i=0; i<a->mbs; i++) { 338 for (j=0; j<bs; j++) { 339 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 340 for (k=a->i[i]; k<a->i[i+1]; k++) { 341 for (l=0; l<bs; l++) { 342 #if defined(PETSC_USE_COMPLEX) 343 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 344 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 345 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 346 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 347 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 348 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 349 } else { 350 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 351 } 352 #else 353 ierr = PetscViewerASCIIPrintf(viewer," %D %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 354 #endif 355 } 356 } 357 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 358 } 359 } 360 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 361 } 362 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 363 PetscFunctionReturn(0); 364 } 365 366 #undef __FUNCT__ 367 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 368 static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 369 { 370 Mat A = (Mat) Aa; 371 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 372 PetscErrorCode ierr; 373 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->bs,bs2=a->bs2; 374 PetscMPIInt rank; 375 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 376 MatScalar *aa; 377 MPI_Comm comm; 378 PetscViewer viewer; 379 380 PetscFunctionBegin; 381 /* 382 This is nasty. If this is called from an originally parallel matrix 383 then all processes call this,but only the first has the matrix so the 384 rest should return immediately. 385 */ 386 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 387 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 388 if (rank) PetscFunctionReturn(0); 389 390 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 391 392 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 393 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 394 395 /* loop over matrix elements drawing boxes */ 396 color = PETSC_DRAW_BLUE; 397 for (i=0,row=0; i<mbs; i++,row+=bs) { 398 for (j=a->i[i]; j<a->i[i+1]; j++) { 399 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 400 x_l = a->j[j]*bs; x_r = x_l + 1.0; 401 aa = a->a + j*bs2; 402 for (k=0; k<bs; k++) { 403 for (l=0; l<bs; l++) { 404 if (PetscRealPart(*aa++) >= 0.) continue; 405 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 406 } 407 } 408 } 409 } 410 color = PETSC_DRAW_CYAN; 411 for (i=0,row=0; i<mbs; i++,row+=bs) { 412 for (j=a->i[i]; j<a->i[i+1]; j++) { 413 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 414 x_l = a->j[j]*bs; x_r = x_l + 1.0; 415 aa = a->a + j*bs2; 416 for (k=0; k<bs; k++) { 417 for (l=0; l<bs; l++) { 418 if (PetscRealPart(*aa++) != 0.) continue; 419 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 420 } 421 } 422 } 423 } 424 425 color = PETSC_DRAW_RED; 426 for (i=0,row=0; i<mbs; i++,row+=bs) { 427 for (j=a->i[i]; j<a->i[i+1]; j++) { 428 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 429 x_l = a->j[j]*bs; x_r = x_l + 1.0; 430 aa = a->a + j*bs2; 431 for (k=0; k<bs; k++) { 432 for (l=0; l<bs; l++) { 433 if (PetscRealPart(*aa++) <= 0.) continue; 434 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 435 } 436 } 437 } 438 } 439 PetscFunctionReturn(0); 440 } 441 442 #undef __FUNCT__ 443 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 444 static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 445 { 446 PetscErrorCode ierr; 447 PetscReal xl,yl,xr,yr,w,h; 448 PetscDraw draw; 449 PetscTruth isnull; 450 451 PetscFunctionBegin; 452 453 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 454 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 455 456 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 457 xr = A->m; yr = A->m; h = yr/10.0; w = xr/10.0; 458 xr += w; yr += h; xl = -w; yl = -h; 459 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 460 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 461 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 462 PetscFunctionReturn(0); 463 } 464 465 #undef __FUNCT__ 466 #define __FUNCT__ "MatView_SeqSBAIJ" 467 PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 468 { 469 PetscErrorCode ierr; 470 PetscTruth iascii,isdraw; 471 472 PetscFunctionBegin; 473 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 474 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 475 if (iascii){ 476 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 477 } else if (isdraw) { 478 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 479 } else { 480 Mat B; 481 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 482 ierr = MatView(B,viewer);CHKERRQ(ierr); 483 ierr = MatDestroy(B);CHKERRQ(ierr); 484 } 485 PetscFunctionReturn(0); 486 } 487 488 489 #undef __FUNCT__ 490 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 491 PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 492 { 493 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 494 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 495 PetscInt *ai = a->i,*ailen = a->ilen; 496 PetscInt brow,bcol,ridx,cidx,bs=A->bs,bs2=a->bs2; 497 MatScalar *ap,*aa = a->a,zero = 0.0; 498 499 PetscFunctionBegin; 500 for (k=0; k<m; k++) { /* loop over rows */ 501 row = im[k]; brow = row/bs; 502 if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",row); 503 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1); 504 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 505 nrow = ailen[brow]; 506 for (l=0; l<n; l++) { /* loop over columns */ 507 if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",in[l]); 508 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->n-1); 509 col = in[l] ; 510 bcol = col/bs; 511 cidx = col%bs; 512 ridx = row%bs; 513 high = nrow; 514 low = 0; /* assume unsorted */ 515 while (high-low > 5) { 516 t = (low+high)/2; 517 if (rp[t] > bcol) high = t; 518 else low = t; 519 } 520 for (i=low; i<high; i++) { 521 if (rp[i] > bcol) break; 522 if (rp[i] == bcol) { 523 *v++ = ap[bs2*i+bs*cidx+ridx]; 524 goto finished; 525 } 526 } 527 *v++ = zero; 528 finished:; 529 } 530 } 531 PetscFunctionReturn(0); 532 } 533 534 535 #undef __FUNCT__ 536 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 537 PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 538 { 539 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 540 PetscErrorCode ierr; 541 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 542 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 543 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->bs,stepval; 544 PetscTruth roworiented=a->roworiented; 545 const MatScalar *value = v; 546 MatScalar *ap,*aa = a->a,*bap; 547 548 PetscFunctionBegin; 549 if (roworiented) { 550 stepval = (n-1)*bs; 551 } else { 552 stepval = (m-1)*bs; 553 } 554 for (k=0; k<m; k++) { /* loop over added rows */ 555 row = im[k]; 556 if (row < 0) continue; 557 #if defined(PETSC_USE_DEBUG) 558 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 559 #endif 560 rp = aj + ai[row]; 561 ap = aa + bs2*ai[row]; 562 rmax = imax[row]; 563 nrow = ailen[row]; 564 low = 0; 565 high = nrow; 566 for (l=0; l<n; l++) { /* loop over added columns */ 567 if (in[l] < 0) continue; 568 col = in[l]; 569 #if defined(PETSC_USE_DEBUG) 570 if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",col,a->nbs-1); 571 #endif 572 if (col < row) continue; /* ignore lower triangular block */ 573 if (roworiented) { 574 value = v + k*(stepval+bs)*bs + l*bs; 575 } else { 576 value = v + l*(stepval+bs)*bs + k*bs; 577 } 578 if (col <= lastcol) low = 0; else high = nrow; 579 lastcol = col; 580 while (high-low > 7) { 581 t = (low+high)/2; 582 if (rp[t] > col) high = t; 583 else low = t; 584 } 585 for (i=low; i<high; i++) { 586 if (rp[i] > col) break; 587 if (rp[i] == col) { 588 bap = ap + bs2*i; 589 if (roworiented) { 590 if (is == ADD_VALUES) { 591 for (ii=0; ii<bs; ii++,value+=stepval) { 592 for (jj=ii; jj<bs2; jj+=bs) { 593 bap[jj] += *value++; 594 } 595 } 596 } else { 597 for (ii=0; ii<bs; ii++,value+=stepval) { 598 for (jj=ii; jj<bs2; jj+=bs) { 599 bap[jj] = *value++; 600 } 601 } 602 } 603 } else { 604 if (is == ADD_VALUES) { 605 for (ii=0; ii<bs; ii++,value+=stepval) { 606 for (jj=0; jj<bs; jj++) { 607 *bap++ += *value++; 608 } 609 } 610 } else { 611 for (ii=0; ii<bs; ii++,value+=stepval) { 612 for (jj=0; jj<bs; jj++) { 613 *bap++ = *value++; 614 } 615 } 616 } 617 } 618 goto noinsert2; 619 } 620 } 621 if (nonew == 1) goto noinsert2; 622 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 623 MatSeqXAIJReallocateAIJ(a,bs2,nrow,row,col,rmax,aa,ai,aj,a->mbs,rp,ap,imax,nonew); 624 N = nrow++ - 1; 625 /* shift up all the later entries in this row */ 626 for (ii=N; ii>=i; ii--) { 627 rp[ii+1] = rp[ii]; 628 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 629 } 630 if (N >= i) { 631 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 632 } 633 rp[i] = col; 634 bap = ap + bs2*i; 635 if (roworiented) { 636 for (ii=0; ii<bs; ii++,value+=stepval) { 637 for (jj=ii; jj<bs2; jj+=bs) { 638 bap[jj] = *value++; 639 } 640 } 641 } else { 642 for (ii=0; ii<bs; ii++,value+=stepval) { 643 for (jj=0; jj<bs; jj++) { 644 *bap++ = *value++; 645 } 646 } 647 } 648 noinsert2:; 649 low = i; 650 } 651 ailen[row] = nrow; 652 } 653 PetscFunctionReturn(0); 654 } 655 656 #undef __FUNCT__ 657 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 658 PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 659 { 660 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 661 PetscErrorCode ierr; 662 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 663 PetscInt m = A->m,*ip,N,*ailen = a->ilen; 664 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 665 MatScalar *aa = a->a,*ap; 666 667 PetscFunctionBegin; 668 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 669 670 if (m) rmax = ailen[0]; 671 for (i=1; i<mbs; i++) { 672 /* move each row back by the amount of empty slots (fshift) before it*/ 673 fshift += imax[i-1] - ailen[i-1]; 674 rmax = PetscMax(rmax,ailen[i]); 675 if (fshift) { 676 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 677 N = ailen[i]; 678 for (j=0; j<N; j++) { 679 ip[j-fshift] = ip[j]; 680 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 681 } 682 } 683 ai[i] = ai[i-1] + ailen[i-1]; 684 } 685 if (mbs) { 686 fshift += imax[mbs-1] - ailen[mbs-1]; 687 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 688 } 689 /* reset ilen and imax for each row */ 690 for (i=0; i<mbs; i++) { 691 ailen[i] = imax[i] = ai[i+1] - ai[i]; 692 } 693 a->nz = ai[mbs]; 694 695 /* diagonals may have moved, reset it */ 696 if (a->diag) { 697 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 698 } 699 ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n", 700 m,A->m,A->bs,fshift*bs2,a->nz*bs2));CHKERRQ(ierr); 701 ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %D\n",a->reallocs));CHKERRQ(ierr); 702 ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %D\n",rmax));CHKERRQ(ierr); 703 a->reallocs = 0; 704 A->info.nz_unneeded = (PetscReal)fshift*bs2; 705 PetscFunctionReturn(0); 706 } 707 708 /* 709 This function returns an array of flags which indicate the locations of contiguous 710 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 711 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 712 Assume: sizes should be long enough to hold all the values. 713 */ 714 #undef __FUNCT__ 715 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 716 PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 717 { 718 PetscInt i,j,k,row; 719 PetscTruth flg; 720 721 PetscFunctionBegin; 722 for (i=0,j=0; i<n; j++) { 723 row = idx[i]; 724 if (row%bs!=0) { /* Not the begining of a block */ 725 sizes[j] = 1; 726 i++; 727 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 728 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 729 i++; 730 } else { /* Begining of the block, so check if the complete block exists */ 731 flg = PETSC_TRUE; 732 for (k=1; k<bs; k++) { 733 if (row+k != idx[i+k]) { /* break in the block */ 734 flg = PETSC_FALSE; 735 break; 736 } 737 } 738 if (flg) { /* No break in the bs */ 739 sizes[j] = bs; 740 i+= bs; 741 } else { 742 sizes[j] = 1; 743 i++; 744 } 745 } 746 } 747 *bs_max = j; 748 PetscFunctionReturn(0); 749 } 750 751 752 /* Only add/insert a(i,j) with i<=j (blocks). 753 Any a(i,j) with i>j input by user is ingored. 754 */ 755 756 #undef __FUNCT__ 757 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 758 PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 759 { 760 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 761 PetscErrorCode ierr; 762 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 763 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 764 PetscInt *aj=a->j,nonew=a->nonew,bs=A->bs,brow,bcol; 765 PetscInt ridx,cidx,bs2=a->bs2; 766 MatScalar *ap,value,*aa=a->a,*bap; 767 768 PetscFunctionBegin; 769 for (k=0; k<m; k++) { /* loop over added rows */ 770 row = im[k]; /* row number */ 771 brow = row/bs; /* block row number */ 772 if (row < 0) continue; 773 #if defined(PETSC_USE_DEBUG) 774 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1); 775 #endif 776 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 777 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 778 rmax = imax[brow]; /* maximum space allocated for this row */ 779 nrow = ailen[brow]; /* actual length of this row */ 780 low = 0; 781 782 for (l=0; l<n; l++) { /* loop over added columns */ 783 if (in[l] < 0) continue; 784 #if defined(PETSC_USE_DEBUG) 785 if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->m-1); 786 #endif 787 col = in[l]; 788 bcol = col/bs; /* block col number */ 789 790 if (brow > bcol) { 791 if (a->ignore_ltriangular){ 792 continue; /* ignore lower triangular values */ 793 } else { 794 SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR)"); 795 } 796 } 797 798 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 799 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 800 /* element value a(k,l) */ 801 if (roworiented) { 802 value = v[l + k*n]; 803 } else { 804 value = v[k + l*m]; 805 } 806 807 /* move pointer bap to a(k,l) quickly and add/insert value */ 808 if (col <= lastcol) low = 0; high = nrow; 809 lastcol = col; 810 while (high-low > 7) { 811 t = (low+high)/2; 812 if (rp[t] > bcol) high = t; 813 else low = t; 814 } 815 for (i=low; i<high; i++) { 816 if (rp[i] > bcol) break; 817 if (rp[i] == bcol) { 818 bap = ap + bs2*i + bs*cidx + ridx; 819 if (is == ADD_VALUES) *bap += value; 820 else *bap = value; 821 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 822 if (brow == bcol && ridx < cidx){ 823 bap = ap + bs2*i + bs*ridx + cidx; 824 if (is == ADD_VALUES) *bap += value; 825 else *bap = value; 826 } 827 goto noinsert1; 828 } 829 } 830 831 if (nonew == 1) goto noinsert1; 832 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 833 MatSeqXAIJReallocateAIJ(a,bs2,nrow,brow,bcol,rmax,aa,ai,aj,a->mbs,rp,ap,imax,nonew); 834 835 N = nrow++ - 1; 836 /* shift up all the later entries in this row */ 837 for (ii=N; ii>=i; ii--) { 838 rp[ii+1] = rp[ii]; 839 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 840 } 841 if (N>=i) { 842 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 843 } 844 rp[i] = bcol; 845 ap[bs2*i + bs*cidx + ridx] = value; 846 noinsert1:; 847 low = i; 848 } 849 } /* end of loop over added columns */ 850 ailen[brow] = nrow; 851 } /* end of loop over added rows */ 852 PetscFunctionReturn(0); 853 } 854 855 EXTERN PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*); 856 EXTERN PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*); 857 EXTERN PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat,PetscInt,IS[],PetscInt); 858 EXTERN PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,PetscInt,MatReuse,Mat*); 859 EXTERN PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat*[]); 860 EXTERN PetscErrorCode MatScale_SeqSBAIJ(Mat,PetscScalar); 861 EXTERN PetscErrorCode MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *); 862 EXTERN PetscErrorCode MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*); 863 EXTERN PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat,Vec); 864 EXTERN PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec); 865 EXTERN PetscErrorCode MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *); 866 EXTERN PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat); 867 EXTERN PetscErrorCode MatGetRowMax_SeqSBAIJ(Mat,Vec); 868 869 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N(Mat,Vec,Vec); 870 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1(Mat,Vec,Vec); 871 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2(Mat,Vec,Vec); 872 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3(Mat,Vec,Vec); 873 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4(Mat,Vec,Vec); 874 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5(Mat,Vec,Vec); 875 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6(Mat,Vec,Vec); 876 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7(Mat,Vec,Vec); 877 878 EXTERN PetscErrorCode MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs); 879 880 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec); 881 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec); 882 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec); 883 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 884 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec); 885 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec); 886 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec); 887 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec); 888 889 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,MatFactorInfo*,Mat*); 890 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,MatFactorInfo*,Mat*); 891 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,MatFactorInfo*,Mat*); 892 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,MatFactorInfo*,Mat*); 893 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,MatFactorInfo*,Mat*); 894 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,MatFactorInfo*,Mat*); 895 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,MatFactorInfo*,Mat*); 896 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,MatFactorInfo*,Mat*); 897 EXTERN PetscErrorCode MatGetInertia_SeqSBAIJ(Mat,PetscInt*,PetscInt*,PetscInt*); 898 899 EXTERN PetscErrorCode MatMult_SeqSBAIJ_1(Mat,Vec,Vec); 900 EXTERN PetscErrorCode MatMult_SeqSBAIJ_2(Mat,Vec,Vec); 901 EXTERN PetscErrorCode MatMult_SeqSBAIJ_3(Mat,Vec,Vec); 902 EXTERN PetscErrorCode MatMult_SeqSBAIJ_4(Mat,Vec,Vec); 903 EXTERN PetscErrorCode MatMult_SeqSBAIJ_5(Mat,Vec,Vec); 904 EXTERN PetscErrorCode MatMult_SeqSBAIJ_6(Mat,Vec,Vec); 905 EXTERN PetscErrorCode MatMult_SeqSBAIJ_7(Mat,Vec,Vec); 906 EXTERN PetscErrorCode MatMult_SeqSBAIJ_N(Mat,Vec,Vec); 907 908 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec); 909 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec); 910 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec); 911 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec); 912 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec); 913 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec); 914 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec); 915 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec); 916 917 #ifdef HAVE_MatICCFactor 918 /* modified from MatILUFactor_SeqSBAIJ, needs further work! */ 919 #undef __FUNCT__ 920 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 921 PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info) 922 { 923 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 924 Mat outA; 925 PetscErrorCode ierr; 926 PetscTruth row_identity,col_identity; 927 928 PetscFunctionBegin; 929 outA = inA; 930 inA->factor = FACTOR_CHOLESKY; 931 932 if (!a->diag) { 933 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 934 } 935 /* 936 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 937 for ILU(0) factorization with natural ordering 938 */ 939 switch (a->bs) { 940 case 1: 941 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 942 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 943 inA->ops->solves = MatSolves_SeqSBAIJ_1; 944 ierr = PetscLoginfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n"));CHKERRQ(ierr); 945 case 2: 946 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 947 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 948 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 949 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"));CHKERRQ(ierr); 950 break; 951 case 3: 952 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 953 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 954 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 955 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n"));CHKERRQ(ierr); 956 break; 957 case 4: 958 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 959 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 960 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 961 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"));CHKERRQ(ierr); 962 break; 963 case 5: 964 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 965 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 966 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 967 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"));CHKERRQ(ierr); 968 break; 969 case 6: 970 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 971 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 972 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 973 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"));CHKERRQ(ierr); 974 break; 975 case 7: 976 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 977 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 978 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 979 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"));CHKERRQ(ierr); 980 break; 981 default: 982 a->row = row; 983 a->icol = col; 984 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 985 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 986 987 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 988 ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr); 989 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 990 991 if (!a->solve_work) { 992 ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 993 ierr = PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 994 } 995 } 996 997 ierr = MatCholeskyFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 998 PetscFunctionReturn(0); 999 } 1000 #endif 1001 1002 #undef __FUNCT__ 1003 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ" 1004 PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat A) 1005 { 1006 static PetscTruth called = PETSC_FALSE; 1007 MPI_Comm comm = A->comm; 1008 PetscErrorCode ierr; 1009 1010 PetscFunctionBegin; 1011 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1012 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1013 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1014 ierr = (*PetscHelpPrintf)(comm," -mat_ignore_ltriangular: Ignore lower triangular values set by user\n");CHKERRQ(ierr); 1015 PetscFunctionReturn(0); 1016 } 1017 1018 EXTERN_C_BEGIN 1019 #undef __FUNCT__ 1020 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1021 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,PetscInt *indices) 1022 { 1023 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1024 PetscInt i,nz,n; 1025 1026 PetscFunctionBegin; 1027 nz = baij->maxnz; 1028 n = mat->n; 1029 for (i=0; i<nz; i++) { 1030 baij->j[i] = indices[i]; 1031 } 1032 baij->nz = nz; 1033 for (i=0; i<n; i++) { 1034 baij->ilen[i] = baij->imax[i]; 1035 } 1036 PetscFunctionReturn(0); 1037 } 1038 EXTERN_C_END 1039 1040 #undef __FUNCT__ 1041 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1042 /*@ 1043 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1044 in the matrix. 1045 1046 Input Parameters: 1047 + mat - the SeqSBAIJ matrix 1048 - indices - the column indices 1049 1050 Level: advanced 1051 1052 Notes: 1053 This can be called if you have precomputed the nonzero structure of the 1054 matrix and want to provide it to the matrix object to improve the performance 1055 of the MatSetValues() operation. 1056 1057 You MUST have set the correct numbers of nonzeros per row in the call to 1058 MatCreateSeqSBAIJ(), and the columns indices MUST be sorted. 1059 1060 MUST be called before any calls to MatSetValues() 1061 1062 .seealso: MatCreateSeqSBAIJ 1063 @*/ 1064 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1065 { 1066 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1067 1068 PetscFunctionBegin; 1069 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1070 PetscValidPointer(indices,2); 1071 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1072 if (f) { 1073 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1074 } else { 1075 SETERRQ(PETSC_ERR_SUP,"Wrong type of matrix to set column indices"); 1076 } 1077 PetscFunctionReturn(0); 1078 } 1079 1080 #undef __FUNCT__ 1081 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1082 PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A) 1083 { 1084 PetscErrorCode ierr; 1085 1086 PetscFunctionBegin; 1087 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1088 PetscFunctionReturn(0); 1089 } 1090 1091 #undef __FUNCT__ 1092 #define __FUNCT__ "MatGetArray_SeqSBAIJ" 1093 PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1094 { 1095 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1096 PetscFunctionBegin; 1097 *array = a->a; 1098 PetscFunctionReturn(0); 1099 } 1100 1101 #undef __FUNCT__ 1102 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ" 1103 PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1104 { 1105 PetscFunctionBegin; 1106 PetscFunctionReturn(0); 1107 } 1108 1109 #include "petscblaslapack.h" 1110 #undef __FUNCT__ 1111 #define __FUNCT__ "MatAXPY_SeqSBAIJ" 1112 PetscErrorCode MatAXPY_SeqSBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1113 { 1114 Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data; 1115 PetscErrorCode ierr; 1116 PetscInt i,bs=Y->bs,bs2,j; 1117 PetscBLASInt bnz = (PetscBLASInt)x->nz,one = 1; 1118 1119 PetscFunctionBegin; 1120 if (str == SAME_NONZERO_PATTERN) { 1121 PetscScalar alpha = a; 1122 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1123 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1124 if (y->xtoy && y->XtoY != X) { 1125 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1126 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1127 } 1128 if (!y->xtoy) { /* get xtoy */ 1129 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1130 y->XtoY = X; 1131 } 1132 bs2 = bs*bs; 1133 for (i=0; i<x->nz; i++) { 1134 j = 0; 1135 while (j < bs2){ 1136 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 1137 j++; 1138 } 1139 } 1140 ierr = PetscLogInfo((0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)));CHKERRQ(ierr); 1141 } else { 1142 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1143 } 1144 PetscFunctionReturn(0); 1145 } 1146 1147 #undef __FUNCT__ 1148 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ" 1149 PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg) 1150 { 1151 PetscFunctionBegin; 1152 *flg = PETSC_TRUE; 1153 PetscFunctionReturn(0); 1154 } 1155 1156 #undef __FUNCT__ 1157 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ" 1158 PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg) 1159 { 1160 PetscFunctionBegin; 1161 *flg = PETSC_TRUE; 1162 PetscFunctionReturn(0); 1163 } 1164 1165 #undef __FUNCT__ 1166 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ" 1167 PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscTruth *flg) 1168 { 1169 PetscFunctionBegin; 1170 *flg = PETSC_FALSE; 1171 PetscFunctionReturn(0); 1172 } 1173 1174 /* -------------------------------------------------------------------*/ 1175 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1176 MatGetRow_SeqSBAIJ, 1177 MatRestoreRow_SeqSBAIJ, 1178 MatMult_SeqSBAIJ_N, 1179 /* 4*/ MatMultAdd_SeqSBAIJ_N, 1180 MatMult_SeqSBAIJ_N, /* transpose versions are same as non-transpose versions */ 1181 MatMultAdd_SeqSBAIJ_N, 1182 MatSolve_SeqSBAIJ_N, 1183 0, 1184 0, 1185 /*10*/ 0, 1186 0, 1187 MatCholeskyFactor_SeqSBAIJ, 1188 MatRelax_SeqSBAIJ, 1189 MatTranspose_SeqSBAIJ, 1190 /*15*/ MatGetInfo_SeqSBAIJ, 1191 MatEqual_SeqSBAIJ, 1192 MatGetDiagonal_SeqSBAIJ, 1193 MatDiagonalScale_SeqSBAIJ, 1194 MatNorm_SeqSBAIJ, 1195 /*20*/ 0, 1196 MatAssemblyEnd_SeqSBAIJ, 1197 0, 1198 MatSetOption_SeqSBAIJ, 1199 MatZeroEntries_SeqSBAIJ, 1200 /*25*/ 0, 1201 0, 1202 0, 1203 MatCholeskyFactorSymbolic_SeqSBAIJ, 1204 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1205 /*30*/ MatSetUpPreallocation_SeqSBAIJ, 1206 0, 1207 MatICCFactorSymbolic_SeqSBAIJ, 1208 MatGetArray_SeqSBAIJ, 1209 MatRestoreArray_SeqSBAIJ, 1210 /*35*/ MatDuplicate_SeqSBAIJ, 1211 0, 1212 0, 1213 0, 1214 0, 1215 /*40*/ MatAXPY_SeqSBAIJ, 1216 MatGetSubMatrices_SeqSBAIJ, 1217 MatIncreaseOverlap_SeqSBAIJ, 1218 MatGetValues_SeqSBAIJ, 1219 0, 1220 /*45*/ MatPrintHelp_SeqSBAIJ, 1221 MatScale_SeqSBAIJ, 1222 0, 1223 0, 1224 0, 1225 /*50*/ 0, 1226 MatGetRowIJ_SeqSBAIJ, 1227 MatRestoreRowIJ_SeqSBAIJ, 1228 0, 1229 0, 1230 /*55*/ 0, 1231 0, 1232 0, 1233 0, 1234 MatSetValuesBlocked_SeqSBAIJ, 1235 /*60*/ MatGetSubMatrix_SeqSBAIJ, 1236 0, 1237 0, 1238 MatGetPetscMaps_Petsc, 1239 0, 1240 /*65*/ 0, 1241 0, 1242 0, 1243 0, 1244 0, 1245 /*70*/ MatGetRowMax_SeqSBAIJ, 1246 0, 1247 0, 1248 0, 1249 0, 1250 /*75*/ 0, 1251 0, 1252 0, 1253 0, 1254 0, 1255 /*80*/ 0, 1256 0, 1257 0, 1258 #if !defined(PETSC_USE_COMPLEX) 1259 MatGetInertia_SeqSBAIJ, 1260 #else 1261 0, 1262 #endif 1263 MatLoad_SeqSBAIJ, 1264 /*85*/ MatIsSymmetric_SeqSBAIJ, 1265 MatIsHermitian_SeqSBAIJ, 1266 MatIsStructurallySymmetric_SeqSBAIJ, 1267 0, 1268 0, 1269 /*90*/ 0, 1270 0, 1271 0, 1272 0, 1273 0, 1274 /*95*/ 0, 1275 0, 1276 0, 1277 0}; 1278 1279 EXTERN_C_BEGIN 1280 #undef __FUNCT__ 1281 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1282 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqSBAIJ(Mat mat) 1283 { 1284 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1285 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 1286 PetscErrorCode ierr; 1287 1288 PetscFunctionBegin; 1289 if (aij->nonew != 1) { 1290 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1291 } 1292 1293 /* allocate space for values if not already there */ 1294 if (!aij->saved_values) { 1295 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1296 } 1297 1298 /* copy values over */ 1299 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1300 PetscFunctionReturn(0); 1301 } 1302 EXTERN_C_END 1303 1304 EXTERN_C_BEGIN 1305 #undef __FUNCT__ 1306 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1307 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqSBAIJ(Mat mat) 1308 { 1309 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1310 PetscErrorCode ierr; 1311 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 1312 1313 PetscFunctionBegin; 1314 if (aij->nonew != 1) { 1315 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1316 } 1317 if (!aij->saved_values) { 1318 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 1319 } 1320 1321 /* copy values over */ 1322 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1323 PetscFunctionReturn(0); 1324 } 1325 EXTERN_C_END 1326 1327 EXTERN_C_BEGIN 1328 #undef __FUNCT__ 1329 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ" 1330 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 1331 { 1332 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1333 PetscErrorCode ierr; 1334 PetscInt i,mbs,bs2; 1335 PetscTruth skipallocation = PETSC_FALSE,flg; 1336 1337 PetscFunctionBegin; 1338 B->preallocated = PETSC_TRUE; 1339 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1340 mbs = B->m/bs; 1341 bs2 = bs*bs; 1342 1343 if (mbs*bs != B->m) { 1344 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1345 } 1346 1347 if (nz == MAT_SKIP_ALLOCATION) { 1348 skipallocation = PETSC_TRUE; 1349 nz = 0; 1350 } 1351 1352 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1353 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 1354 if (nnz) { 1355 for (i=0; i<mbs; i++) { 1356 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 1357 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],mbs); 1358 } 1359 } 1360 1361 ierr = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1362 if (!flg) { 1363 switch (bs) { 1364 case 1: 1365 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1366 B->ops->solve = MatSolve_SeqSBAIJ_1; 1367 B->ops->solves = MatSolves_SeqSBAIJ_1; 1368 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 1369 B->ops->mult = MatMult_SeqSBAIJ_1; 1370 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1371 B->ops->multtranspose = MatMult_SeqSBAIJ_1; 1372 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_1; 1373 break; 1374 case 2: 1375 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1376 B->ops->solve = MatSolve_SeqSBAIJ_2; 1377 B->ops->solvetranspose = MatSolve_SeqSBAIJ_2; 1378 B->ops->mult = MatMult_SeqSBAIJ_2; 1379 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1380 B->ops->multtranspose = MatMult_SeqSBAIJ_2; 1381 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_2; 1382 break; 1383 case 3: 1384 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1385 B->ops->solve = MatSolve_SeqSBAIJ_3; 1386 B->ops->solvetranspose = MatSolve_SeqSBAIJ_3; 1387 B->ops->mult = MatMult_SeqSBAIJ_3; 1388 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1389 B->ops->multtranspose = MatMult_SeqSBAIJ_3; 1390 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_3; 1391 break; 1392 case 4: 1393 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1394 B->ops->solve = MatSolve_SeqSBAIJ_4; 1395 B->ops->solvetranspose = MatSolve_SeqSBAIJ_4; 1396 B->ops->mult = MatMult_SeqSBAIJ_4; 1397 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1398 B->ops->multtranspose = MatMult_SeqSBAIJ_4; 1399 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_4; 1400 break; 1401 case 5: 1402 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1403 B->ops->solve = MatSolve_SeqSBAIJ_5; 1404 B->ops->solvetranspose = MatSolve_SeqSBAIJ_5; 1405 B->ops->mult = MatMult_SeqSBAIJ_5; 1406 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1407 B->ops->multtranspose = MatMult_SeqSBAIJ_5; 1408 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_5; 1409 break; 1410 case 6: 1411 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1412 B->ops->solve = MatSolve_SeqSBAIJ_6; 1413 B->ops->solvetranspose = MatSolve_SeqSBAIJ_6; 1414 B->ops->mult = MatMult_SeqSBAIJ_6; 1415 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1416 B->ops->multtranspose = MatMult_SeqSBAIJ_6; 1417 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_6; 1418 break; 1419 case 7: 1420 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1421 B->ops->solve = MatSolve_SeqSBAIJ_7; 1422 B->ops->solvetranspose = MatSolve_SeqSBAIJ_7; 1423 B->ops->mult = MatMult_SeqSBAIJ_7; 1424 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1425 B->ops->multtranspose = MatMult_SeqSBAIJ_7; 1426 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_7; 1427 break; 1428 } 1429 } 1430 1431 b->mbs = mbs; 1432 b->nbs = mbs; 1433 if (!skipallocation) { 1434 /* b->ilen will count nonzeros in each block row so far. */ 1435 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 1436 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1437 if (!nnz) { 1438 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1439 else if (nz <= 0) nz = 1; 1440 for (i=0; i<mbs; i++) { 1441 b->imax[i] = nz; 1442 } 1443 nz = nz*mbs; /* total nz */ 1444 } else { 1445 nz = 0; 1446 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1447 } 1448 /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1449 1450 /* allocate the matrix space */ 1451 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->m+1,PetscInt,&b->i);CHKERRQ(ierr); 1452 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1453 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1454 b->singlemalloc = PETSC_TRUE; 1455 1456 /* pointer to beginning of each row */ 1457 b->i[0] = 0; 1458 for (i=1; i<mbs+1; i++) { 1459 b->i[i] = b->i[i-1] + b->imax[i-1]; 1460 } 1461 } 1462 1463 B->bs = bs; 1464 b->bs2 = bs2; 1465 b->nz = 0; 1466 b->maxnz = nz*bs2; 1467 1468 b->inew = 0; 1469 b->jnew = 0; 1470 b->anew = 0; 1471 b->a2anew = 0; 1472 b->permute = PETSC_FALSE; 1473 PetscFunctionReturn(0); 1474 } 1475 EXTERN_C_END 1476 1477 EXTERN_C_BEGIN 1478 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 1479 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqBAIJ(Mat, MatType,MatReuse,Mat*); 1480 EXTERN_C_END 1481 1482 /*MC 1483 MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices, 1484 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1485 1486 Options Database Keys: 1487 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions() 1488 1489 Level: beginner 1490 1491 .seealso: MatCreateSeqSBAIJ 1492 M*/ 1493 1494 EXTERN_C_BEGIN 1495 #undef __FUNCT__ 1496 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1497 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqSBAIJ(Mat B) 1498 { 1499 Mat_SeqSBAIJ *b; 1500 PetscErrorCode ierr; 1501 PetscMPIInt size; 1502 PetscTruth flg; 1503 1504 PetscFunctionBegin; 1505 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1506 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1507 B->m = B->M = PetscMax(B->m,B->M); 1508 B->n = B->N = PetscMax(B->n,B->N); 1509 1510 ierr = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1511 B->data = (void*)b; 1512 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1513 B->ops->destroy = MatDestroy_SeqSBAIJ; 1514 B->ops->view = MatView_SeqSBAIJ; 1515 B->factor = 0; 1516 B->mapping = 0; 1517 b->row = 0; 1518 b->icol = 0; 1519 b->reallocs = 0; 1520 b->saved_values = 0; 1521 1522 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1523 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1524 1525 b->sorted = PETSC_FALSE; 1526 b->roworiented = PETSC_TRUE; 1527 b->nonew = 0; 1528 b->diag = 0; 1529 b->solve_work = 0; 1530 b->mult_work = 0; 1531 B->spptr = 0; 1532 b->keepzeroedrows = PETSC_FALSE; 1533 b->xtoy = 0; 1534 b->XtoY = 0; 1535 1536 b->inew = 0; 1537 b->jnew = 0; 1538 b->anew = 0; 1539 b->a2anew = 0; 1540 b->permute = PETSC_FALSE; 1541 1542 b->ignore_ltriangular = PETSC_FALSE; 1543 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_ignore_lower_triangular",&flg);CHKERRQ(ierr); 1544 if (flg) b->ignore_ltriangular = PETSC_TRUE; 1545 1546 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1547 "MatStoreValues_SeqSBAIJ", 1548 MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1549 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1550 "MatRetrieveValues_SeqSBAIJ", 1551 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1552 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1553 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1554 MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1555 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C", 1556 "MatConvert_SeqSBAIJ_SeqAIJ", 1557 MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr); 1558 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C", 1559 "MatConvert_SeqSBAIJ_SeqBAIJ", 1560 MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr); 1561 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C", 1562 "MatSeqSBAIJSetPreallocation_SeqSBAIJ", 1563 MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr); 1564 1565 B->symmetric = PETSC_TRUE; 1566 B->structurally_symmetric = PETSC_TRUE; 1567 B->symmetric_set = PETSC_TRUE; 1568 B->structurally_symmetric_set = PETSC_TRUE; 1569 PetscFunctionReturn(0); 1570 } 1571 EXTERN_C_END 1572 1573 #undef __FUNCT__ 1574 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1575 /*@C 1576 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1577 compressed row) format. For good matrix assembly performance the 1578 user should preallocate the matrix storage by setting the parameter nz 1579 (or the array nnz). By setting these parameters accurately, performance 1580 during matrix assembly can be increased by more than a factor of 50. 1581 1582 Collective on Mat 1583 1584 Input Parameters: 1585 + A - the symmetric matrix 1586 . bs - size of block 1587 . nz - number of block nonzeros per block row (same for all rows) 1588 - nnz - array containing the number of block nonzeros in the upper triangular plus 1589 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1590 1591 Options Database Keys: 1592 . -mat_no_unroll - uses code that does not unroll the loops in the 1593 block calculations (much slower) 1594 . -mat_block_size - size of the blocks to use 1595 1596 Level: intermediate 1597 1598 Notes: 1599 Specify the preallocated storage with either nz or nnz (not both). 1600 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1601 allocation. For additional details, see the users manual chapter on 1602 matrices. 1603 1604 If the nnz parameter is given then the nz parameter is ignored 1605 1606 1607 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1608 @*/ 1609 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 1610 { 1611 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 1612 1613 PetscFunctionBegin; 1614 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1615 if (f) { 1616 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 1617 } 1618 PetscFunctionReturn(0); 1619 } 1620 1621 #undef __FUNCT__ 1622 #define __FUNCT__ "MatCreateSeqSBAIJ" 1623 /*@C 1624 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1625 compressed row) format. For good matrix assembly performance the 1626 user should preallocate the matrix storage by setting the parameter nz 1627 (or the array nnz). By setting these parameters accurately, performance 1628 during matrix assembly can be increased by more than a factor of 50. 1629 1630 Collective on MPI_Comm 1631 1632 Input Parameters: 1633 + comm - MPI communicator, set to PETSC_COMM_SELF 1634 . bs - size of block 1635 . m - number of rows, or number of columns 1636 . nz - number of block nonzeros per block row (same for all rows) 1637 - nnz - array containing the number of block nonzeros in the upper triangular plus 1638 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1639 1640 Output Parameter: 1641 . A - the symmetric matrix 1642 1643 Options Database Keys: 1644 . -mat_no_unroll - uses code that does not unroll the loops in the 1645 block calculations (much slower) 1646 . -mat_block_size - size of the blocks to use 1647 1648 Level: intermediate 1649 1650 Notes: 1651 1652 Specify the preallocated storage with either nz or nnz (not both). 1653 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1654 allocation. For additional details, see the users manual chapter on 1655 matrices. 1656 1657 If the nnz parameter is given then the nz parameter is ignored 1658 1659 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1660 @*/ 1661 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 1662 { 1663 PetscErrorCode ierr; 1664 1665 PetscFunctionBegin; 1666 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1667 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 1668 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1669 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 1670 PetscFunctionReturn(0); 1671 } 1672 1673 #undef __FUNCT__ 1674 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1675 PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1676 { 1677 Mat C; 1678 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1679 PetscErrorCode ierr; 1680 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 1681 1682 PetscFunctionBegin; 1683 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1684 1685 *B = 0; 1686 ierr = MatCreate(A->comm,&C);CHKERRQ(ierr); 1687 ierr = MatSetSizes(C,A->m,A->n,A->m,A->n);CHKERRQ(ierr); 1688 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 1689 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1690 c = (Mat_SeqSBAIJ*)C->data; 1691 1692 C->preallocated = PETSC_TRUE; 1693 C->factor = A->factor; 1694 c->row = 0; 1695 c->icol = 0; 1696 c->saved_values = 0; 1697 c->keepzeroedrows = a->keepzeroedrows; 1698 C->assembled = PETSC_TRUE; 1699 1700 C->M = A->M; 1701 C->N = A->N; 1702 C->bs = A->bs; 1703 c->bs2 = a->bs2; 1704 c->mbs = a->mbs; 1705 c->nbs = a->nbs; 1706 1707 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->imax);CHKERRQ(ierr); 1708 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->ilen);CHKERRQ(ierr); 1709 for (i=0; i<mbs; i++) { 1710 c->imax[i] = a->imax[i]; 1711 c->ilen[i] = a->ilen[i]; 1712 } 1713 ierr = PetscLogObjectMemory(C,2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr); 1714 1715 /* allocate the matrix space */ 1716 ierr = PetscMalloc3(bs2*nz,MatScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 1717 c->singlemalloc = PETSC_TRUE; 1718 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1719 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt)));CHKERRQ(ierr); 1720 if (mbs > 0) { 1721 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1722 if (cpvalues == MAT_COPY_VALUES) { 1723 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1724 } else { 1725 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1726 } 1727 } 1728 1729 c->sorted = a->sorted; 1730 c->roworiented = a->roworiented; 1731 c->nonew = a->nonew; 1732 1733 if (a->diag) { 1734 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 1735 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1736 for (i=0; i<mbs; i++) { 1737 c->diag[i] = a->diag[i]; 1738 } 1739 } else c->diag = 0; 1740 c->nz = a->nz; 1741 c->maxnz = a->maxnz; 1742 c->solve_work = 0; 1743 c->mult_work = 0; 1744 *B = C; 1745 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1746 PetscFunctionReturn(0); 1747 } 1748 1749 #undef __FUNCT__ 1750 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1751 PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer, MatType type,Mat *A) 1752 { 1753 Mat_SeqSBAIJ *a; 1754 Mat B; 1755 PetscErrorCode ierr; 1756 int fd; 1757 PetscMPIInt size; 1758 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 1759 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 1760 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 1761 PetscInt *masked,nmask,tmp,bs2,ishift; 1762 PetscScalar *aa; 1763 MPI_Comm comm = ((PetscObject)viewer)->comm; 1764 1765 PetscFunctionBegin; 1766 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1767 bs2 = bs*bs; 1768 1769 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1770 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1771 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1772 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1773 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1774 M = header[1]; N = header[2]; nz = header[3]; 1775 1776 if (header[3] < 0) { 1777 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1778 } 1779 1780 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1781 1782 /* 1783 This code adds extra rows to make sure the number of rows is 1784 divisible by the blocksize 1785 */ 1786 mbs = M/bs; 1787 extra_rows = bs - M + bs*(mbs); 1788 if (extra_rows == bs) extra_rows = 0; 1789 else mbs++; 1790 if (extra_rows) { 1791 ierr = PetscLogInfo((0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr); 1792 } 1793 1794 /* read in row lengths */ 1795 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1796 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1797 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1798 1799 /* read in column indices */ 1800 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1801 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1802 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1803 1804 /* loop over row lengths determining block row lengths */ 1805 ierr = PetscMalloc(mbs*sizeof(PetscInt),&s_browlengths);CHKERRQ(ierr); 1806 ierr = PetscMemzero(s_browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1807 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 1808 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1809 masked = mask + mbs; 1810 rowcount = 0; nzcount = 0; 1811 for (i=0; i<mbs; i++) { 1812 nmask = 0; 1813 for (j=0; j<bs; j++) { 1814 kmax = rowlengths[rowcount]; 1815 for (k=0; k<kmax; k++) { 1816 tmp = jj[nzcount++]/bs; /* block col. index */ 1817 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1818 } 1819 rowcount++; 1820 } 1821 s_browlengths[i] += nmask; 1822 1823 /* zero out the mask elements we set */ 1824 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1825 } 1826 1827 /* create our matrix */ 1828 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 1829 ierr = MatSetSizes(B,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 1830 ierr = MatSetType(B,type);CHKERRQ(ierr); 1831 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(B,bs,0,s_browlengths);CHKERRQ(ierr); 1832 a = (Mat_SeqSBAIJ*)B->data; 1833 1834 /* set matrix "i" values */ 1835 a->i[0] = 0; 1836 for (i=1; i<= mbs; i++) { 1837 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1838 a->ilen[i-1] = s_browlengths[i-1]; 1839 } 1840 a->nz = a->i[mbs]; 1841 1842 /* read in nonzero values */ 1843 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1844 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1845 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1846 1847 /* set "a" and "j" values into matrix */ 1848 nzcount = 0; jcount = 0; 1849 for (i=0; i<mbs; i++) { 1850 nzcountb = nzcount; 1851 nmask = 0; 1852 for (j=0; j<bs; j++) { 1853 kmax = rowlengths[i*bs+j]; 1854 for (k=0; k<kmax; k++) { 1855 tmp = jj[nzcount++]/bs; /* block col. index */ 1856 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1857 } 1858 } 1859 /* sort the masked values */ 1860 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1861 1862 /* set "j" values into matrix */ 1863 maskcount = 1; 1864 for (j=0; j<nmask; j++) { 1865 a->j[jcount++] = masked[j]; 1866 mask[masked[j]] = maskcount++; 1867 } 1868 1869 /* set "a" values into matrix */ 1870 ishift = bs2*a->i[i]; 1871 for (j=0; j<bs; j++) { 1872 kmax = rowlengths[i*bs+j]; 1873 for (k=0; k<kmax; k++) { 1874 tmp = jj[nzcountb]/bs ; /* block col. index */ 1875 if (tmp >= i){ 1876 block = mask[tmp] - 1; 1877 point = jj[nzcountb] - bs*tmp; 1878 idx = ishift + bs2*block + j + bs*point; 1879 a->a[idx] = aa[nzcountb]; 1880 } 1881 nzcountb++; 1882 } 1883 } 1884 /* zero out the mask elements we set */ 1885 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1886 } 1887 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1888 1889 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1890 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1891 ierr = PetscFree(aa);CHKERRQ(ierr); 1892 ierr = PetscFree(jj);CHKERRQ(ierr); 1893 ierr = PetscFree(mask);CHKERRQ(ierr); 1894 1895 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1896 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1897 ierr = MatView_Private(B);CHKERRQ(ierr); 1898 *A = B; 1899 PetscFunctionReturn(0); 1900 } 1901 1902 #undef __FUNCT__ 1903 #define __FUNCT__ "MatRelax_SeqSBAIJ" 1904 PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1905 { 1906 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1907 MatScalar *aa=a->a,*v,*v1; 1908 PetscScalar *x,*b,*t,sum,d; 1909 PetscErrorCode ierr; 1910 PetscInt m=a->mbs,bs=A->bs,*ai=a->i,*aj=a->j; 1911 PetscInt nz,nz1,*vj,*vj1,i; 1912 1913 PetscFunctionBegin; 1914 its = its*lits; 1915 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1916 1917 if (bs > 1) 1918 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 1919 1920 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1921 if (xx != bb) { 1922 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1923 } else { 1924 b = x; 1925 } 1926 1927 ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr); 1928 1929 if (flag & SOR_ZERO_INITIAL_GUESS) { 1930 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1931 for (i=0; i<m; i++) 1932 t[i] = b[i]; 1933 1934 for (i=0; i<m; i++){ 1935 d = *(aa + ai[i]); /* diag[i] */ 1936 v = aa + ai[i] + 1; 1937 vj = aj + ai[i] + 1; 1938 nz = ai[i+1] - ai[i] - 1; 1939 x[i] = omega*t[i]/d; 1940 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 1941 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 1942 } 1943 } 1944 1945 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 1946 for (i=0; i<m; i++) 1947 t[i] = b[i]; 1948 1949 for (i=0; i<m-1; i++){ /* update rhs */ 1950 v = aa + ai[i] + 1; 1951 vj = aj + ai[i] + 1; 1952 nz = ai[i+1] - ai[i] - 1; 1953 while (nz--) t[*vj++] -= x[i]*(*v++); 1954 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 1955 } 1956 for (i=m-1; i>=0; i--){ 1957 d = *(aa + ai[i]); 1958 v = aa + ai[i] + 1; 1959 vj = aj + ai[i] + 1; 1960 nz = ai[i+1] - ai[i] - 1; 1961 sum = t[i]; 1962 while (nz--) sum -= x[*vj++]*(*v++); 1963 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 1964 x[i] = (1-omega)*x[i] + omega*sum/d; 1965 } 1966 } 1967 its--; 1968 } 1969 1970 while (its--) { 1971 /* 1972 forward sweep: 1973 for i=0,...,m-1: 1974 sum[i] = (b[i] - U(i,:)x )/d[i]; 1975 x[i] = (1-omega)x[i] + omega*sum[i]; 1976 b = b - x[i]*U^T(i,:); 1977 1978 */ 1979 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1980 for (i=0; i<m; i++) 1981 t[i] = b[i]; 1982 1983 for (i=0; i<m; i++){ 1984 d = *(aa + ai[i]); /* diag[i] */ 1985 v = aa + ai[i] + 1; v1=v; 1986 vj = aj + ai[i] + 1; vj1=vj; 1987 nz = ai[i+1] - ai[i] - 1; nz1=nz; 1988 sum = t[i]; 1989 while (nz1--) sum -= (*v1++)*x[*vj1++]; 1990 x[i] = (1-omega)*x[i] + omega*sum/d; 1991 while (nz--) t[*vj++] -= x[i]*(*v++); 1992 ierr = PetscLogFlops(4*nz-2);CHKERRQ(ierr); 1993 } 1994 } 1995 1996 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 1997 /* 1998 backward sweep: 1999 b = b - x[i]*U^T(i,:), i=0,...,n-2 2000 for i=m-1,...,0: 2001 sum[i] = (b[i] - U(i,:)x )/d[i]; 2002 x[i] = (1-omega)x[i] + omega*sum[i]; 2003 */ 2004 for (i=0; i<m; i++) 2005 t[i] = b[i]; 2006 2007 for (i=0; i<m-1; i++){ /* update rhs */ 2008 v = aa + ai[i] + 1; 2009 vj = aj + ai[i] + 1; 2010 nz = ai[i+1] - ai[i] - 1; 2011 while (nz--) t[*vj++] -= x[i]*(*v++); 2012 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2013 } 2014 for (i=m-1; i>=0; i--){ 2015 d = *(aa + ai[i]); 2016 v = aa + ai[i] + 1; 2017 vj = aj + ai[i] + 1; 2018 nz = ai[i+1] - ai[i] - 1; 2019 sum = t[i]; 2020 while (nz--) sum -= x[*vj++]*(*v++); 2021 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2022 x[i] = (1-omega)*x[i] + omega*sum/d; 2023 } 2024 } 2025 } 2026 2027 ierr = PetscFree(t);CHKERRQ(ierr); 2028 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2029 if (bb != xx) { 2030 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2031 } 2032 PetscFunctionReturn(0); 2033 } 2034 2035 2036 2037 2038 2039 2040