1 2 /* 3 Defines the basic matrix operations for the SBAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 7 #include "src/inline/spops.h" 8 #include "src/mat/impls/sbaij/seq/sbaij.h" 9 10 #define CHUNKSIZE 10 11 12 /* 13 Checks for missing diagonals 14 */ 15 #undef __FUNCT__ 16 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 17 PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A) 18 { 19 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 20 PetscErrorCode ierr; 21 PetscInt *diag,*jj = a->j,i; 22 23 PetscFunctionBegin; 24 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 25 diag = a->diag; 26 for (i=0; i<a->mbs; i++) { 27 if (jj[diag[i]] != i) SETERRQ1(PETSC_ERR_ARG_CORRUPT,"Matrix is missing diagonal number %D",i); 28 } 29 PetscFunctionReturn(0); 30 } 31 32 #undef __FUNCT__ 33 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 34 PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A) 35 { 36 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 37 PetscErrorCode ierr; 38 PetscInt i,mbs = a->mbs; 39 40 PetscFunctionBegin; 41 if (a->diag) PetscFunctionReturn(0); 42 43 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 44 ierr = PetscLogObjectMemory(A,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 45 for (i=0; i<mbs; i++) a->diag[i] = a->i[i]; 46 PetscFunctionReturn(0); 47 } 48 49 #undef __FUNCT__ 50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 51 static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 52 { 53 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 54 PetscInt n = a->mbs,i; 55 56 PetscFunctionBegin; 57 *nn = n; 58 if (!ia) PetscFunctionReturn(0); 59 60 if (oshift == 1) { 61 /* temporarily add 1 to i and j indices */ 62 PetscInt nz = a->i[n]; 63 for (i=0; i<nz; i++) a->j[i]++; 64 for (i=0; i<n+1; i++) a->i[i]++; 65 *ia = a->i; *ja = a->j; 66 } else { 67 *ia = a->i; *ja = a->j; 68 } 69 PetscFunctionReturn(0); 70 } 71 72 #undef __FUNCT__ 73 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 74 static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 75 { 76 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 77 PetscInt i,n = a->mbs; 78 79 PetscFunctionBegin; 80 if (!ia) PetscFunctionReturn(0); 81 82 if (oshift == 1) { 83 PetscInt nz = a->i[n]-1; 84 for (i=0; i<nz; i++) a->j[i]--; 85 for (i=0; i<n+1; i++) a->i[i]--; 86 } 87 PetscFunctionReturn(0); 88 } 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 92 PetscErrorCode MatDestroy_SeqSBAIJ(Mat A) 93 { 94 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 95 PetscErrorCode ierr; 96 97 PetscFunctionBegin; 98 PetscLogObjectState((PetscObject)A,"Rows=%D, NZ=%D",A->m,a->nz); 99 ierr = PetscFree(a->a);CHKERRQ(ierr); 100 if (!a->singlemalloc) { 101 ierr = PetscFree(a->i);CHKERRQ(ierr); 102 ierr = PetscFree(a->j);CHKERRQ(ierr); 103 } 104 if (a->row) { 105 ierr = ISDestroy(a->row);CHKERRQ(ierr); 106 } 107 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 108 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 109 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 110 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 111 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 112 if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);} 113 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 114 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 115 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 116 117 if (a->inew){ 118 ierr = PetscFree(a->inew);CHKERRQ(ierr); 119 a->inew = 0; 120 } 121 ierr = PetscFree(a);CHKERRQ(ierr); 122 123 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 124 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 125 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 126 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 127 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqbaij_C","",PETSC_NULL);CHKERRQ(ierr); 128 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 129 PetscFunctionReturn(0); 130 } 131 132 #undef __FUNCT__ 133 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 134 PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op) 135 { 136 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 137 138 PetscFunctionBegin; 139 switch (op) { 140 case MAT_ROW_ORIENTED: 141 a->roworiented = PETSC_TRUE; 142 break; 143 case MAT_COLUMN_ORIENTED: 144 a->roworiented = PETSC_FALSE; 145 break; 146 case MAT_COLUMNS_SORTED: 147 a->sorted = PETSC_TRUE; 148 break; 149 case MAT_COLUMNS_UNSORTED: 150 a->sorted = PETSC_FALSE; 151 break; 152 case MAT_KEEP_ZEROED_ROWS: 153 a->keepzeroedrows = PETSC_TRUE; 154 break; 155 case MAT_NO_NEW_NONZERO_LOCATIONS: 156 a->nonew = 1; 157 break; 158 case MAT_NEW_NONZERO_LOCATION_ERR: 159 a->nonew = -1; 160 break; 161 case MAT_NEW_NONZERO_ALLOCATION_ERR: 162 a->nonew = -2; 163 break; 164 case MAT_YES_NEW_NONZERO_LOCATIONS: 165 a->nonew = 0; 166 break; 167 case MAT_ROWS_SORTED: 168 case MAT_ROWS_UNSORTED: 169 case MAT_YES_NEW_DIAGONALS: 170 case MAT_IGNORE_OFF_PROC_ENTRIES: 171 case MAT_USE_HASH_TABLE: 172 PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n"); 173 break; 174 case MAT_NO_NEW_DIAGONALS: 175 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 176 case MAT_NOT_SYMMETRIC: 177 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 178 case MAT_HERMITIAN: 179 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 180 case MAT_SYMMETRIC: 181 case MAT_STRUCTURALLY_SYMMETRIC: 182 case MAT_NOT_HERMITIAN: 183 case MAT_SYMMETRY_ETERNAL: 184 case MAT_NOT_SYMMETRY_ETERNAL: 185 break; 186 default: 187 SETERRQ(PETSC_ERR_SUP,"unknown option"); 188 } 189 PetscFunctionReturn(0); 190 } 191 192 #undef __FUNCT__ 193 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 194 PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **v) 195 { 196 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 197 PetscErrorCode ierr; 198 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2; 199 MatScalar *aa,*aa_i; 200 PetscScalar *v_i; 201 202 PetscFunctionBegin; 203 bs = A->bs; 204 ai = a->i; 205 aj = a->j; 206 aa = a->a; 207 bs2 = a->bs2; 208 209 if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %D out of range", row); 210 211 bn = row/bs; /* Block number */ 212 bp = row % bs; /* Block position */ 213 M = ai[bn+1] - ai[bn]; 214 *ncols = bs*M; 215 216 if (v) { 217 *v = 0; 218 if (*ncols) { 219 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 220 for (i=0; i<M; i++) { /* for each block in the block row */ 221 v_i = *v + i*bs; 222 aa_i = aa + bs2*(ai[bn] + i); 223 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 224 } 225 } 226 } 227 228 if (cols) { 229 *cols = 0; 230 if (*ncols) { 231 ierr = PetscMalloc((*ncols+row)*sizeof(PetscInt),cols);CHKERRQ(ierr); 232 for (i=0; i<M; i++) { /* for each block in the block row */ 233 cols_i = *cols + i*bs; 234 itmp = bs*aj[ai[bn] + i]; 235 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 236 } 237 } 238 } 239 240 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 241 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 242 #ifdef column_search 243 v_i = *v + M*bs; 244 cols_i = *cols + M*bs; 245 for (i=0; i<bn; i++){ /* for each block row */ 246 M = ai[i+1] - ai[i]; 247 for (j=0; j<M; j++){ 248 itmp = aj[ai[i] + j]; /* block column value */ 249 if (itmp == bn){ 250 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 251 for (k=0; k<bs; k++) { 252 *cols_i++ = i*bs+k; 253 *v_i++ = aa_i[k]; 254 } 255 *ncols += bs; 256 break; 257 } 258 } 259 } 260 #endif 261 PetscFunctionReturn(0); 262 } 263 264 #undef __FUNCT__ 265 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 266 PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 267 { 268 PetscErrorCode ierr; 269 270 PetscFunctionBegin; 271 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 272 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 273 PetscFunctionReturn(0); 274 } 275 276 #undef __FUNCT__ 277 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 278 PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,Mat *B) 279 { 280 PetscErrorCode ierr; 281 PetscFunctionBegin; 282 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 283 PetscFunctionReturn(0); 284 } 285 286 #undef __FUNCT__ 287 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 288 static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 289 { 290 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 291 PetscErrorCode ierr; 292 PetscInt i,j,bs = A->bs,k,l,bs2=a->bs2; 293 char *name; 294 PetscViewerFormat format; 295 296 PetscFunctionBegin; 297 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 298 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 299 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 300 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 301 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 302 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 303 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 304 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 305 for (i=0; i<a->mbs; i++) { 306 for (j=0; j<bs; j++) { 307 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 308 for (k=a->i[i]; k<a->i[i+1]; k++) { 309 for (l=0; l<bs; l++) { 310 #if defined(PETSC_USE_COMPLEX) 311 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 312 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 313 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 314 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 315 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 316 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 317 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 318 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 319 } 320 #else 321 if (a->a[bs2*k + l*bs + j] != 0.0) { 322 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 323 } 324 #endif 325 } 326 } 327 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 328 } 329 } 330 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 331 } else { 332 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 333 for (i=0; i<a->mbs; i++) { 334 for (j=0; j<bs; j++) { 335 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 336 for (k=a->i[i]; k<a->i[i+1]; k++) { 337 for (l=0; l<bs; l++) { 338 #if defined(PETSC_USE_COMPLEX) 339 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 340 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 341 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 342 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 343 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 344 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 345 } else { 346 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 347 } 348 #else 349 ierr = PetscViewerASCIIPrintf(viewer," %D %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 350 #endif 351 } 352 } 353 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 354 } 355 } 356 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 357 } 358 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 359 PetscFunctionReturn(0); 360 } 361 362 #undef __FUNCT__ 363 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 364 static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 365 { 366 Mat A = (Mat) Aa; 367 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 368 PetscErrorCode ierr; 369 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->bs,bs2=a->bs2; 370 PetscMPIInt rank; 371 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 372 MatScalar *aa; 373 MPI_Comm comm; 374 PetscViewer viewer; 375 376 PetscFunctionBegin; 377 /* 378 This is nasty. If this is called from an originally parallel matrix 379 then all processes call this,but only the first has the matrix so the 380 rest should return immediately. 381 */ 382 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 383 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 384 if (rank) PetscFunctionReturn(0); 385 386 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 387 388 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 389 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 390 391 /* loop over matrix elements drawing boxes */ 392 color = PETSC_DRAW_BLUE; 393 for (i=0,row=0; i<mbs; i++,row+=bs) { 394 for (j=a->i[i]; j<a->i[i+1]; j++) { 395 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 396 x_l = a->j[j]*bs; x_r = x_l + 1.0; 397 aa = a->a + j*bs2; 398 for (k=0; k<bs; k++) { 399 for (l=0; l<bs; l++) { 400 if (PetscRealPart(*aa++) >= 0.) continue; 401 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 402 } 403 } 404 } 405 } 406 color = PETSC_DRAW_CYAN; 407 for (i=0,row=0; i<mbs; i++,row+=bs) { 408 for (j=a->i[i]; j<a->i[i+1]; j++) { 409 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 410 x_l = a->j[j]*bs; x_r = x_l + 1.0; 411 aa = a->a + j*bs2; 412 for (k=0; k<bs; k++) { 413 for (l=0; l<bs; l++) { 414 if (PetscRealPart(*aa++) != 0.) continue; 415 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 416 } 417 } 418 } 419 } 420 421 color = PETSC_DRAW_RED; 422 for (i=0,row=0; i<mbs; i++,row+=bs) { 423 for (j=a->i[i]; j<a->i[i+1]; j++) { 424 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 425 x_l = a->j[j]*bs; x_r = x_l + 1.0; 426 aa = a->a + j*bs2; 427 for (k=0; k<bs; k++) { 428 for (l=0; l<bs; l++) { 429 if (PetscRealPart(*aa++) <= 0.) continue; 430 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 431 } 432 } 433 } 434 } 435 PetscFunctionReturn(0); 436 } 437 438 #undef __FUNCT__ 439 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 440 static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 441 { 442 PetscErrorCode ierr; 443 PetscReal xl,yl,xr,yr,w,h; 444 PetscDraw draw; 445 PetscTruth isnull; 446 447 PetscFunctionBegin; 448 449 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 450 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 451 452 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 453 xr = A->m; yr = A->m; h = yr/10.0; w = xr/10.0; 454 xr += w; yr += h; xl = -w; yl = -h; 455 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 456 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 457 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 458 PetscFunctionReturn(0); 459 } 460 461 #undef __FUNCT__ 462 #define __FUNCT__ "MatView_SeqSBAIJ" 463 PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 464 { 465 PetscErrorCode ierr; 466 PetscTruth iascii,isdraw; 467 468 PetscFunctionBegin; 469 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 470 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 471 if (iascii){ 472 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 473 } else if (isdraw) { 474 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 475 } else { 476 Mat B; 477 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 478 ierr = MatView(B,viewer);CHKERRQ(ierr); 479 ierr = MatDestroy(B);CHKERRQ(ierr); 480 } 481 PetscFunctionReturn(0); 482 } 483 484 485 #undef __FUNCT__ 486 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 487 PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 488 { 489 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 490 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 491 PetscInt *ai = a->i,*ailen = a->ilen; 492 PetscInt brow,bcol,ridx,cidx,bs=A->bs,bs2=a->bs2; 493 MatScalar *ap,*aa = a->a,zero = 0.0; 494 495 PetscFunctionBegin; 496 for (k=0; k<m; k++) { /* loop over rows */ 497 row = im[k]; brow = row/bs; 498 if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",row); 499 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1); 500 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 501 nrow = ailen[brow]; 502 for (l=0; l<n; l++) { /* loop over columns */ 503 if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",in[l]); 504 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->n-1); 505 col = in[l] ; 506 bcol = col/bs; 507 cidx = col%bs; 508 ridx = row%bs; 509 high = nrow; 510 low = 0; /* assume unsorted */ 511 while (high-low > 5) { 512 t = (low+high)/2; 513 if (rp[t] > bcol) high = t; 514 else low = t; 515 } 516 for (i=low; i<high; i++) { 517 if (rp[i] > bcol) break; 518 if (rp[i] == bcol) { 519 *v++ = ap[bs2*i+bs*cidx+ridx]; 520 goto finished; 521 } 522 } 523 *v++ = zero; 524 finished:; 525 } 526 } 527 PetscFunctionReturn(0); 528 } 529 530 531 #undef __FUNCT__ 532 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 533 PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 534 { 535 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 536 PetscErrorCode ierr; 537 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 538 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 539 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->bs,stepval; 540 PetscTruth roworiented=a->roworiented; 541 const MatScalar *value = v; 542 MatScalar *ap,*aa = a->a,*bap; 543 544 PetscFunctionBegin; 545 if (roworiented) { 546 stepval = (n-1)*bs; 547 } else { 548 stepval = (m-1)*bs; 549 } 550 for (k=0; k<m; k++) { /* loop over added rows */ 551 row = im[k]; 552 if (row < 0) continue; 553 #if defined(PETSC_USE_DEBUG) 554 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 555 #endif 556 rp = aj + ai[row]; 557 ap = aa + bs2*ai[row]; 558 rmax = imax[row]; 559 nrow = ailen[row]; 560 low = 0; 561 high = nrow; 562 for (l=0; l<n; l++) { /* loop over added columns */ 563 if (in[l] < 0) continue; 564 col = in[l]; 565 #if defined(PETSC_USE_DEBUG) 566 if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",col,a->nbs-1); 567 #endif 568 if (col < row) continue; /* ignore lower triangular block */ 569 if (roworiented) { 570 value = v + k*(stepval+bs)*bs + l*bs; 571 } else { 572 value = v + l*(stepval+bs)*bs + k*bs; 573 } 574 if (col < lastcol) low = 0; else high = nrow; 575 lastcol = col; 576 while (high-low > 7) { 577 t = (low+high)/2; 578 if (rp[t] > col) high = t; 579 else low = t; 580 } 581 for (i=low; i<high; i++) { 582 if (rp[i] > col) break; 583 if (rp[i] == col) { 584 bap = ap + bs2*i; 585 if (roworiented) { 586 if (is == ADD_VALUES) { 587 for (ii=0; ii<bs; ii++,value+=stepval) { 588 for (jj=ii; jj<bs2; jj+=bs) { 589 bap[jj] += *value++; 590 } 591 } 592 } else { 593 for (ii=0; ii<bs; ii++,value+=stepval) { 594 for (jj=ii; jj<bs2; jj+=bs) { 595 bap[jj] = *value++; 596 } 597 } 598 } 599 } else { 600 if (is == ADD_VALUES) { 601 for (ii=0; ii<bs; ii++,value+=stepval) { 602 for (jj=0; jj<bs; jj++) { 603 *bap++ += *value++; 604 } 605 } 606 } else { 607 for (ii=0; ii<bs; ii++,value+=stepval) { 608 for (jj=0; jj<bs; jj++) { 609 *bap++ = *value++; 610 } 611 } 612 } 613 } 614 goto noinsert2; 615 } 616 } 617 if (nonew == 1) goto noinsert2; 618 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 619 if (nrow >= rmax) { 620 /* there is no extra room in row, therefore enlarge */ 621 PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 622 MatScalar *new_a; 623 624 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 625 626 /* malloc new storage space */ 627 len = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); 628 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 629 new_j = (PetscInt*)(new_a + bs2*new_nz); 630 new_i = new_j + new_nz; 631 632 /* copy over old data into new slots */ 633 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 634 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 635 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); 636 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 637 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); 638 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 639 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 640 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 641 /* free up old matrix storage */ 642 ierr = PetscFree(a->a);CHKERRQ(ierr); 643 if (!a->singlemalloc) { 644 ierr = PetscFree(a->i);CHKERRQ(ierr); 645 ierr = PetscFree(a->j);CHKERRQ(ierr); 646 } 647 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 648 a->singlemalloc = PETSC_TRUE; 649 650 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 651 rmax = imax[row] = imax[row] + CHUNKSIZE; 652 ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); 653 a->maxnz += bs2*CHUNKSIZE; 654 a->reallocs++; 655 a->nz++; 656 } 657 N = nrow++ - 1; 658 /* shift up all the later entries in this row */ 659 for (ii=N; ii>=i; ii--) { 660 rp[ii+1] = rp[ii]; 661 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 662 } 663 if (N >= i) { 664 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 665 } 666 rp[i] = col; 667 bap = ap + bs2*i; 668 if (roworiented) { 669 for (ii=0; ii<bs; ii++,value+=stepval) { 670 for (jj=ii; jj<bs2; jj+=bs) { 671 bap[jj] = *value++; 672 } 673 } 674 } else { 675 for (ii=0; ii<bs; ii++,value+=stepval) { 676 for (jj=0; jj<bs; jj++) { 677 *bap++ = *value++; 678 } 679 } 680 } 681 noinsert2:; 682 low = i; 683 } 684 ailen[row] = nrow; 685 } 686 PetscFunctionReturn(0); 687 } 688 689 #undef __FUNCT__ 690 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 691 PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 692 { 693 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 694 PetscErrorCode ierr; 695 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 696 PetscInt m = A->m,*ip,N,*ailen = a->ilen; 697 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 698 MatScalar *aa = a->a,*ap; 699 700 PetscFunctionBegin; 701 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 702 703 if (m) rmax = ailen[0]; 704 for (i=1; i<mbs; i++) { 705 /* move each row back by the amount of empty slots (fshift) before it*/ 706 fshift += imax[i-1] - ailen[i-1]; 707 rmax = PetscMax(rmax,ailen[i]); 708 if (fshift) { 709 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 710 N = ailen[i]; 711 for (j=0; j<N; j++) { 712 ip[j-fshift] = ip[j]; 713 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 714 } 715 } 716 ai[i] = ai[i-1] + ailen[i-1]; 717 } 718 if (mbs) { 719 fshift += imax[mbs-1] - ailen[mbs-1]; 720 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 721 } 722 /* reset ilen and imax for each row */ 723 for (i=0; i<mbs; i++) { 724 ailen[i] = imax[i] = ai[i+1] - ai[i]; 725 } 726 a->nz = ai[mbs]; 727 728 /* diagonals may have moved, reset it */ 729 if (a->diag) { 730 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 731 } 732 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n", 733 m,A->m,A->bs,fshift*bs2,a->nz*bs2); 734 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %D\n", 735 a->reallocs); 736 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %D\n",rmax); 737 a->reallocs = 0; 738 A->info.nz_unneeded = (PetscReal)fshift*bs2; 739 PetscFunctionReturn(0); 740 } 741 742 /* 743 This function returns an array of flags which indicate the locations of contiguous 744 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 745 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 746 Assume: sizes should be long enough to hold all the values. 747 */ 748 #undef __FUNCT__ 749 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 750 PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 751 { 752 PetscInt i,j,k,row; 753 PetscTruth flg; 754 755 PetscFunctionBegin; 756 for (i=0,j=0; i<n; j++) { 757 row = idx[i]; 758 if (row%bs!=0) { /* Not the begining of a block */ 759 sizes[j] = 1; 760 i++; 761 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 762 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 763 i++; 764 } else { /* Begining of the block, so check if the complete block exists */ 765 flg = PETSC_TRUE; 766 for (k=1; k<bs; k++) { 767 if (row+k != idx[i+k]) { /* break in the block */ 768 flg = PETSC_FALSE; 769 break; 770 } 771 } 772 if (flg) { /* No break in the bs */ 773 sizes[j] = bs; 774 i+= bs; 775 } else { 776 sizes[j] = 1; 777 i++; 778 } 779 } 780 } 781 *bs_max = j; 782 PetscFunctionReturn(0); 783 } 784 785 786 /* Only add/insert a(i,j) with i<=j (blocks). 787 Any a(i,j) with i>j input by user is ingored. 788 */ 789 790 #undef __FUNCT__ 791 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 792 PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 793 { 794 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 795 PetscErrorCode ierr; 796 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 797 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 798 PetscInt *aj=a->j,nonew=a->nonew,bs=A->bs,brow,bcol; 799 PetscInt ridx,cidx,bs2=a->bs2; 800 MatScalar *ap,value,*aa=a->a,*bap; 801 802 PetscFunctionBegin; 803 for (k=0; k<m; k++) { /* loop over added rows */ 804 row = im[k]; /* row number */ 805 brow = row/bs; /* block row number */ 806 if (row < 0) continue; 807 #if defined(PETSC_USE_DEBUG) 808 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1); 809 #endif 810 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 811 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 812 rmax = imax[brow]; /* maximum space allocated for this row */ 813 nrow = ailen[brow]; /* actual length of this row */ 814 low = 0; 815 816 for (l=0; l<n; l++) { /* loop over added columns */ 817 if (in[l] < 0) continue; 818 #if defined(PETSC_USE_DEBUG) 819 if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->m-1); 820 #endif 821 col = in[l]; 822 bcol = col/bs; /* block col number */ 823 824 if (brow > bcol) continue; /* ignore lower triangular values */ 825 826 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 827 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 828 /* element value a(k,l) */ 829 if (roworiented) { 830 value = v[l + k*n]; 831 } else { 832 value = v[k + l*m]; 833 } 834 835 /* move pointer bap to a(k,l) quickly and add/insert value */ 836 if (col < lastcol) low = 0; high = nrow; 837 lastcol = col; 838 while (high-low > 7) { 839 t = (low+high)/2; 840 if (rp[t] > bcol) high = t; 841 else low = t; 842 } 843 for (i=low; i<high; i++) { 844 if (rp[i] > bcol) break; 845 if (rp[i] == bcol) { 846 bap = ap + bs2*i + bs*cidx + ridx; 847 if (is == ADD_VALUES) *bap += value; 848 else *bap = value; 849 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 850 if (brow == bcol && ridx < cidx){ 851 bap = ap + bs2*i + bs*ridx + cidx; 852 if (is == ADD_VALUES) *bap += value; 853 else *bap = value; 854 } 855 goto noinsert1; 856 } 857 } 858 859 if (nonew == 1) goto noinsert1; 860 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 861 if (nrow >= rmax) { 862 /* there is no extra room in row, therefore enlarge */ 863 PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 864 MatScalar *new_a; 865 866 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 867 868 /* Malloc new storage space */ 869 len = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); 870 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 871 new_j = (PetscInt*)(new_a + bs2*new_nz); 872 new_i = new_j + new_nz; 873 874 /* copy over old data into new slots */ 875 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 876 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 877 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); 878 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 879 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); 880 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 881 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 882 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 883 /* free up old matrix storage */ 884 ierr = PetscFree(a->a);CHKERRQ(ierr); 885 if (!a->singlemalloc) { 886 ierr = PetscFree(a->i);CHKERRQ(ierr); 887 ierr = PetscFree(a->j);CHKERRQ(ierr); 888 } 889 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 890 a->singlemalloc = PETSC_TRUE; 891 892 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 893 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 894 ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); 895 a->maxnz += bs2*CHUNKSIZE; 896 a->reallocs++; 897 a->nz++; 898 } 899 900 N = nrow++ - 1; 901 /* shift up all the later entries in this row */ 902 for (ii=N; ii>=i; ii--) { 903 rp[ii+1] = rp[ii]; 904 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 905 } 906 if (N>=i) { 907 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 908 } 909 rp[i] = bcol; 910 ap[bs2*i + bs*cidx + ridx] = value; 911 noinsert1:; 912 low = i; 913 } 914 } /* end of loop over added columns */ 915 ailen[brow] = nrow; 916 } /* end of loop over added rows */ 917 PetscFunctionReturn(0); 918 } 919 920 EXTERN PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*); 921 EXTERN PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*); 922 EXTERN PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat,PetscInt,IS[],PetscInt); 923 EXTERN PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,PetscInt,MatReuse,Mat*); 924 EXTERN PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat*[]); 925 EXTERN PetscErrorCode MatScale_SeqSBAIJ(const PetscScalar*,Mat); 926 EXTERN PetscErrorCode MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *); 927 EXTERN PetscErrorCode MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*); 928 EXTERN PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat,Vec); 929 EXTERN PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec); 930 EXTERN PetscErrorCode MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *); 931 EXTERN PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat); 932 EXTERN PetscErrorCode MatGetRowMax_SeqSBAIJ(Mat,Vec); 933 934 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N(Mat,Vec,Vec); 935 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1(Mat,Vec,Vec); 936 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2(Mat,Vec,Vec); 937 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3(Mat,Vec,Vec); 938 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4(Mat,Vec,Vec); 939 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5(Mat,Vec,Vec); 940 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6(Mat,Vec,Vec); 941 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7(Mat,Vec,Vec); 942 943 EXTERN PetscErrorCode MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs); 944 945 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec); 946 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec); 947 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec); 948 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 949 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec); 950 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec); 951 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec); 952 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec); 953 954 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,MatFactorInfo*,Mat*); 955 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,MatFactorInfo*,Mat*); 956 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,MatFactorInfo*,Mat*); 957 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,MatFactorInfo*,Mat*); 958 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,MatFactorInfo*,Mat*); 959 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,MatFactorInfo*,Mat*); 960 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,MatFactorInfo*,Mat*); 961 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,MatFactorInfo*,Mat*); 962 EXTERN PetscErrorCode MatGetInertia_SeqSBAIJ(Mat,PetscInt*,PetscInt*,PetscInt*); 963 964 EXTERN PetscErrorCode MatMult_SeqSBAIJ_1(Mat,Vec,Vec); 965 EXTERN PetscErrorCode MatMult_SeqSBAIJ_2(Mat,Vec,Vec); 966 EXTERN PetscErrorCode MatMult_SeqSBAIJ_3(Mat,Vec,Vec); 967 EXTERN PetscErrorCode MatMult_SeqSBAIJ_4(Mat,Vec,Vec); 968 EXTERN PetscErrorCode MatMult_SeqSBAIJ_5(Mat,Vec,Vec); 969 EXTERN PetscErrorCode MatMult_SeqSBAIJ_6(Mat,Vec,Vec); 970 EXTERN PetscErrorCode MatMult_SeqSBAIJ_7(Mat,Vec,Vec); 971 EXTERN PetscErrorCode MatMult_SeqSBAIJ_N(Mat,Vec,Vec); 972 973 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec); 974 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec); 975 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec); 976 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec); 977 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec); 978 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec); 979 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec); 980 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec); 981 982 #ifdef HAVE_MatICCFactor 983 /* modified from MatILUFactor_SeqSBAIJ, needs further work! */ 984 #undef __FUNCT__ 985 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 986 PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info) 987 { 988 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 989 Mat outA; 990 PetscErrorCode ierr; 991 PetscTruth row_identity,col_identity; 992 993 PetscFunctionBegin; 994 outA = inA; 995 inA->factor = FACTOR_CHOLESKY; 996 997 if (!a->diag) { 998 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 999 } 1000 /* 1001 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1002 for ILU(0) factorization with natural ordering 1003 */ 1004 switch (a->bs) { 1005 case 1: 1006 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1007 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1008 inA->ops->solves = MatSolves_SeqSBAIJ_1; 1009 PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n"); 1010 case 2: 1011 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 1012 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1013 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1014 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1015 break; 1016 case 3: 1017 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 1018 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1019 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1020 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n"); 1021 break; 1022 case 4: 1023 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 1024 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1025 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1026 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1027 break; 1028 case 5: 1029 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 1030 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1031 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1032 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1033 break; 1034 case 6: 1035 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 1036 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1037 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1038 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1039 break; 1040 case 7: 1041 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 1042 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1043 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1044 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1045 break; 1046 default: 1047 a->row = row; 1048 a->icol = col; 1049 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1050 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1051 1052 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1053 ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr); 1054 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 1055 1056 if (!a->solve_work) { 1057 ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1058 ierr = PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 1059 } 1060 } 1061 1062 ierr = MatCholeskyFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 1063 PetscFunctionReturn(0); 1064 } 1065 #endif 1066 1067 #undef __FUNCT__ 1068 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ" 1069 PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat A) 1070 { 1071 static PetscTruth called = PETSC_FALSE; 1072 MPI_Comm comm = A->comm; 1073 PetscErrorCode ierr; 1074 1075 PetscFunctionBegin; 1076 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1077 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1078 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1079 PetscFunctionReturn(0); 1080 } 1081 1082 EXTERN_C_BEGIN 1083 #undef __FUNCT__ 1084 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1085 PetscErrorCode MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,PetscInt *indices) 1086 { 1087 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1088 PetscInt i,nz,n; 1089 1090 PetscFunctionBegin; 1091 nz = baij->maxnz; 1092 n = mat->n; 1093 for (i=0; i<nz; i++) { 1094 baij->j[i] = indices[i]; 1095 } 1096 baij->nz = nz; 1097 for (i=0; i<n; i++) { 1098 baij->ilen[i] = baij->imax[i]; 1099 } 1100 PetscFunctionReturn(0); 1101 } 1102 EXTERN_C_END 1103 1104 #undef __FUNCT__ 1105 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1106 /*@ 1107 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1108 in the matrix. 1109 1110 Input Parameters: 1111 + mat - the SeqSBAIJ matrix 1112 - indices - the column indices 1113 1114 Level: advanced 1115 1116 Notes: 1117 This can be called if you have precomputed the nonzero structure of the 1118 matrix and want to provide it to the matrix object to improve the performance 1119 of the MatSetValues() operation. 1120 1121 You MUST have set the correct numbers of nonzeros per row in the call to 1122 MatCreateSeqSBAIJ(), and the columns indices MUST be sorted. 1123 1124 MUST be called before any calls to MatSetValues() 1125 1126 .seealso: MatCreateSeqSBAIJ 1127 @*/ 1128 PetscErrorCode MatSeqSBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1129 { 1130 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1131 1132 PetscFunctionBegin; 1133 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1134 PetscValidPointer(indices,2); 1135 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1136 if (f) { 1137 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1138 } else { 1139 SETERRQ(PETSC_ERR_SUP,"Wrong type of matrix to set column indices"); 1140 } 1141 PetscFunctionReturn(0); 1142 } 1143 1144 #undef __FUNCT__ 1145 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1146 PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A) 1147 { 1148 PetscErrorCode ierr; 1149 1150 PetscFunctionBegin; 1151 ierr = MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1152 PetscFunctionReturn(0); 1153 } 1154 1155 #undef __FUNCT__ 1156 #define __FUNCT__ "MatGetArray_SeqSBAIJ" 1157 PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1158 { 1159 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1160 PetscFunctionBegin; 1161 *array = a->a; 1162 PetscFunctionReturn(0); 1163 } 1164 1165 #undef __FUNCT__ 1166 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ" 1167 PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1168 { 1169 PetscFunctionBegin; 1170 PetscFunctionReturn(0); 1171 } 1172 1173 #include "petscblaslapack.h" 1174 #undef __FUNCT__ 1175 #define __FUNCT__ "MatAXPY_SeqSBAIJ" 1176 PetscErrorCode MatAXPY_SeqSBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str) 1177 { 1178 Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data; 1179 PetscErrorCode ierr; 1180 PetscInt i,bs=Y->bs,bs2,j; 1181 PetscBLASInt bnz = (PetscBLASInt)x->nz,one = 1; 1182 1183 PetscFunctionBegin; 1184 if (str == SAME_NONZERO_PATTERN) { 1185 BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1186 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1187 if (y->xtoy && y->XtoY != X) { 1188 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1189 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1190 } 1191 if (!y->xtoy) { /* get xtoy */ 1192 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1193 y->XtoY = X; 1194 } 1195 bs2 = bs*bs; 1196 for (i=0; i<x->nz; i++) { 1197 j = 0; 1198 while (j < bs2){ 1199 y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]); 1200 j++; 1201 } 1202 } 1203 PetscLogInfo(0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)); 1204 } else { 1205 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1206 } 1207 PetscFunctionReturn(0); 1208 } 1209 1210 #undef __FUNCT__ 1211 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ" 1212 PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg) 1213 { 1214 PetscFunctionBegin; 1215 *flg = PETSC_TRUE; 1216 PetscFunctionReturn(0); 1217 } 1218 1219 #undef __FUNCT__ 1220 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ" 1221 PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg) 1222 { 1223 PetscFunctionBegin; 1224 *flg = PETSC_TRUE; 1225 PetscFunctionReturn(0); 1226 } 1227 1228 #undef __FUNCT__ 1229 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ" 1230 PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscTruth *flg) 1231 { 1232 PetscFunctionBegin; 1233 *flg = PETSC_FALSE; 1234 PetscFunctionReturn(0); 1235 } 1236 1237 /* -------------------------------------------------------------------*/ 1238 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1239 MatGetRow_SeqSBAIJ, 1240 MatRestoreRow_SeqSBAIJ, 1241 MatMult_SeqSBAIJ_N, 1242 /* 4*/ MatMultAdd_SeqSBAIJ_N, 1243 MatMult_SeqSBAIJ_N, 1244 MatMultAdd_SeqSBAIJ_N, 1245 MatSolve_SeqSBAIJ_N, 1246 0, 1247 0, 1248 /*10*/ 0, 1249 0, 1250 MatCholeskyFactor_SeqSBAIJ, 1251 MatRelax_SeqSBAIJ, 1252 MatTranspose_SeqSBAIJ, 1253 /*15*/ MatGetInfo_SeqSBAIJ, 1254 MatEqual_SeqSBAIJ, 1255 MatGetDiagonal_SeqSBAIJ, 1256 MatDiagonalScale_SeqSBAIJ, 1257 MatNorm_SeqSBAIJ, 1258 /*20*/ 0, 1259 MatAssemblyEnd_SeqSBAIJ, 1260 0, 1261 MatSetOption_SeqSBAIJ, 1262 MatZeroEntries_SeqSBAIJ, 1263 /*25*/ 0, 1264 0, 1265 0, 1266 MatCholeskyFactorSymbolic_SeqSBAIJ, 1267 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1268 /*30*/ MatSetUpPreallocation_SeqSBAIJ, 1269 0, 1270 MatICCFactorSymbolic_SeqSBAIJ, 1271 MatGetArray_SeqSBAIJ, 1272 MatRestoreArray_SeqSBAIJ, 1273 /*35*/ MatDuplicate_SeqSBAIJ, 1274 0, 1275 0, 1276 0, 1277 0, 1278 /*40*/ MatAXPY_SeqSBAIJ, 1279 MatGetSubMatrices_SeqSBAIJ, 1280 MatIncreaseOverlap_SeqSBAIJ, 1281 MatGetValues_SeqSBAIJ, 1282 0, 1283 /*45*/ MatPrintHelp_SeqSBAIJ, 1284 MatScale_SeqSBAIJ, 1285 0, 1286 0, 1287 0, 1288 /*50*/ 0, 1289 MatGetRowIJ_SeqSBAIJ, 1290 MatRestoreRowIJ_SeqSBAIJ, 1291 0, 1292 0, 1293 /*55*/ 0, 1294 0, 1295 0, 1296 0, 1297 MatSetValuesBlocked_SeqSBAIJ, 1298 /*60*/ MatGetSubMatrix_SeqSBAIJ, 1299 0, 1300 0, 1301 MatGetPetscMaps_Petsc, 1302 0, 1303 /*65*/ 0, 1304 0, 1305 0, 1306 0, 1307 0, 1308 /*70*/ MatGetRowMax_SeqSBAIJ, 1309 0, 1310 0, 1311 0, 1312 0, 1313 /*75*/ 0, 1314 0, 1315 0, 1316 0, 1317 0, 1318 /*80*/ 0, 1319 0, 1320 0, 1321 #if !defined(PETSC_USE_COMPLEX) 1322 MatGetInertia_SeqSBAIJ, 1323 #else 1324 0, 1325 #endif 1326 MatLoad_SeqSBAIJ, 1327 /*85*/ MatIsSymmetric_SeqSBAIJ, 1328 MatIsHermitian_SeqSBAIJ, 1329 MatIsStructurallySymmetric_SeqSBAIJ, 1330 0, 1331 0, 1332 /*90*/ 0, 1333 0, 1334 0, 1335 0, 1336 0, 1337 /*95*/ 0, 1338 0, 1339 0, 1340 0}; 1341 1342 EXTERN_C_BEGIN 1343 #undef __FUNCT__ 1344 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1345 PetscErrorCode MatStoreValues_SeqSBAIJ(Mat mat) 1346 { 1347 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1348 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 1349 PetscErrorCode ierr; 1350 1351 PetscFunctionBegin; 1352 if (aij->nonew != 1) { 1353 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1354 } 1355 1356 /* allocate space for values if not already there */ 1357 if (!aij->saved_values) { 1358 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1359 } 1360 1361 /* copy values over */ 1362 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1363 PetscFunctionReturn(0); 1364 } 1365 EXTERN_C_END 1366 1367 EXTERN_C_BEGIN 1368 #undef __FUNCT__ 1369 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1370 PetscErrorCode MatRetrieveValues_SeqSBAIJ(Mat mat) 1371 { 1372 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1373 PetscErrorCode ierr; 1374 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 1375 1376 PetscFunctionBegin; 1377 if (aij->nonew != 1) { 1378 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1379 } 1380 if (!aij->saved_values) { 1381 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 1382 } 1383 1384 /* copy values over */ 1385 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1386 PetscFunctionReturn(0); 1387 } 1388 EXTERN_C_END 1389 1390 EXTERN_C_BEGIN 1391 #undef __FUNCT__ 1392 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ" 1393 PetscErrorCode MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 1394 { 1395 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1396 PetscErrorCode ierr; 1397 PetscInt i,len,mbs,bs2; 1398 PetscTruth flg; 1399 1400 PetscFunctionBegin; 1401 B->preallocated = PETSC_TRUE; 1402 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1403 mbs = B->m/bs; 1404 bs2 = bs*bs; 1405 1406 if (mbs*bs != B->m) { 1407 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1408 } 1409 1410 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1411 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 1412 if (nnz) { 1413 for (i=0; i<mbs; i++) { 1414 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 1415 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],mbs); 1416 } 1417 } 1418 1419 ierr = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1420 if (!flg) { 1421 switch (bs) { 1422 case 1: 1423 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1424 B->ops->solve = MatSolve_SeqSBAIJ_1; 1425 B->ops->solves = MatSolves_SeqSBAIJ_1; 1426 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 1427 B->ops->mult = MatMult_SeqSBAIJ_1; 1428 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1429 break; 1430 case 2: 1431 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1432 B->ops->solve = MatSolve_SeqSBAIJ_2; 1433 B->ops->solvetranspose = MatSolve_SeqSBAIJ_2; 1434 B->ops->mult = MatMult_SeqSBAIJ_2; 1435 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1436 break; 1437 case 3: 1438 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1439 B->ops->solve = MatSolve_SeqSBAIJ_3; 1440 B->ops->solvetranspose = MatSolve_SeqSBAIJ_3; 1441 B->ops->mult = MatMult_SeqSBAIJ_3; 1442 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1443 break; 1444 case 4: 1445 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1446 B->ops->solve = MatSolve_SeqSBAIJ_4; 1447 B->ops->solvetranspose = MatSolve_SeqSBAIJ_4; 1448 B->ops->mult = MatMult_SeqSBAIJ_4; 1449 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1450 break; 1451 case 5: 1452 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1453 B->ops->solve = MatSolve_SeqSBAIJ_5; 1454 B->ops->solvetranspose = MatSolve_SeqSBAIJ_5; 1455 B->ops->mult = MatMult_SeqSBAIJ_5; 1456 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1457 break; 1458 case 6: 1459 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1460 B->ops->solve = MatSolve_SeqSBAIJ_6; 1461 B->ops->solvetranspose = MatSolve_SeqSBAIJ_6; 1462 B->ops->mult = MatMult_SeqSBAIJ_6; 1463 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1464 break; 1465 case 7: 1466 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1467 B->ops->solve = MatSolve_SeqSBAIJ_7; 1468 B->ops->solvetranspose = MatSolve_SeqSBAIJ_7; 1469 B->ops->mult = MatMult_SeqSBAIJ_7; 1470 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1471 break; 1472 } 1473 } 1474 1475 b->mbs = mbs; 1476 b->nbs = mbs; 1477 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->imax);CHKERRQ(ierr); 1478 if (!nnz) { 1479 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1480 else if (nz <= 0) nz = 1; 1481 for (i=0; i<mbs; i++) { 1482 b->imax[i] = nz; 1483 } 1484 nz = nz*mbs; /* total nz */ 1485 } else { 1486 nz = 0; 1487 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1488 } 1489 /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1490 1491 /* allocate the matrix space */ 1492 len = nz*sizeof(PetscInt) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(PetscInt); 1493 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1494 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1495 b->j = (PetscInt*)(b->a + nz*bs2); 1496 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1497 b->i = b->j + nz; 1498 b->singlemalloc = PETSC_TRUE; 1499 1500 /* pointer to beginning of each row */ 1501 b->i[0] = 0; 1502 for (i=1; i<mbs+1; i++) { 1503 b->i[i] = b->i[i-1] + b->imax[i-1]; 1504 } 1505 1506 /* b->ilen will count nonzeros in each block row so far. */ 1507 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->ilen);CHKERRQ(ierr); 1508 ierr = PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr); 1509 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1510 1511 B->bs = bs; 1512 b->bs2 = bs2; 1513 b->nz = 0; 1514 b->maxnz = nz*bs2; 1515 1516 b->inew = 0; 1517 b->jnew = 0; 1518 b->anew = 0; 1519 b->a2anew = 0; 1520 b->permute = PETSC_FALSE; 1521 PetscFunctionReturn(0); 1522 } 1523 EXTERN_C_END 1524 1525 EXTERN_C_BEGIN 1526 EXTERN PetscErrorCode MatConvert_SeqSBAIJ_SeqAIJ(Mat,const MatType,MatReuse,Mat*); 1527 EXTERN PetscErrorCode MatConvert_SeqSBAIJ_SeqBAIJ(Mat,const MatType,MatReuse,Mat*); 1528 EXTERN_C_END 1529 1530 /*MC 1531 MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices, 1532 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1533 1534 Options Database Keys: 1535 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions() 1536 1537 Level: beginner 1538 1539 .seealso: MatCreateSeqSBAIJ 1540 M*/ 1541 1542 EXTERN_C_BEGIN 1543 #undef __FUNCT__ 1544 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1545 PetscErrorCode MatCreate_SeqSBAIJ(Mat B) 1546 { 1547 Mat_SeqSBAIJ *b; 1548 PetscErrorCode ierr; 1549 PetscMPIInt size; 1550 1551 PetscFunctionBegin; 1552 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1553 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1554 B->m = B->M = PetscMax(B->m,B->M); 1555 B->n = B->N = PetscMax(B->n,B->N); 1556 1557 ierr = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1558 B->data = (void*)b; 1559 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1560 B->ops->destroy = MatDestroy_SeqSBAIJ; 1561 B->ops->view = MatView_SeqSBAIJ; 1562 B->factor = 0; 1563 B->lupivotthreshold = 1.0; 1564 B->mapping = 0; 1565 b->row = 0; 1566 b->icol = 0; 1567 b->reallocs = 0; 1568 b->saved_values = 0; 1569 1570 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1571 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1572 1573 b->sorted = PETSC_FALSE; 1574 b->roworiented = PETSC_TRUE; 1575 b->nonew = 0; 1576 b->diag = 0; 1577 b->solve_work = 0; 1578 b->mult_work = 0; 1579 B->spptr = 0; 1580 b->keepzeroedrows = PETSC_FALSE; 1581 b->xtoy = 0; 1582 b->XtoY = 0; 1583 1584 b->inew = 0; 1585 b->jnew = 0; 1586 b->anew = 0; 1587 b->a2anew = 0; 1588 b->permute = PETSC_FALSE; 1589 1590 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1591 "MatStoreValues_SeqSBAIJ", 1592 MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1593 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1594 "MatRetrieveValues_SeqSBAIJ", 1595 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1596 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1597 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1598 MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1599 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C", 1600 "MatConvert_SeqSBAIJ_SeqAIJ", 1601 MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr); 1602 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C", 1603 "MatConvert_SeqSBAIJ_SeqBAIJ", 1604 MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr); 1605 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C", 1606 "MatSeqSBAIJSetPreallocation_SeqSBAIJ", 1607 MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr); 1608 1609 B->symmetric = PETSC_TRUE; 1610 B->structurally_symmetric = PETSC_TRUE; 1611 B->symmetric_set = PETSC_TRUE; 1612 B->structurally_symmetric_set = PETSC_TRUE; 1613 PetscFunctionReturn(0); 1614 } 1615 EXTERN_C_END 1616 1617 #undef __FUNCT__ 1618 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1619 /*@C 1620 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1621 compressed row) format. For good matrix assembly performance the 1622 user should preallocate the matrix storage by setting the parameter nz 1623 (or the array nnz). By setting these parameters accurately, performance 1624 during matrix assembly can be increased by more than a factor of 50. 1625 1626 Collective on Mat 1627 1628 Input Parameters: 1629 + A - the symmetric matrix 1630 . bs - size of block 1631 . nz - number of block nonzeros per block row (same for all rows) 1632 - nnz - array containing the number of block nonzeros in the upper triangular plus 1633 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1634 1635 Options Database Keys: 1636 . -mat_no_unroll - uses code that does not unroll the loops in the 1637 block calculations (much slower) 1638 . -mat_block_size - size of the blocks to use 1639 1640 Level: intermediate 1641 1642 Notes: 1643 Specify the preallocated storage with either nz or nnz (not both). 1644 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1645 allocation. For additional details, see the users manual chapter on 1646 matrices. 1647 1648 If the nnz parameter is given then the nz parameter is ignored 1649 1650 1651 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1652 @*/ 1653 PetscErrorCode MatSeqSBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 1654 { 1655 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 1656 1657 PetscFunctionBegin; 1658 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1659 if (f) { 1660 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 1661 } 1662 PetscFunctionReturn(0); 1663 } 1664 1665 #undef __FUNCT__ 1666 #define __FUNCT__ "MatCreateSeqSBAIJ" 1667 /*@C 1668 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1669 compressed row) format. For good matrix assembly performance the 1670 user should preallocate the matrix storage by setting the parameter nz 1671 (or the array nnz). By setting these parameters accurately, performance 1672 during matrix assembly can be increased by more than a factor of 50. 1673 1674 Collective on MPI_Comm 1675 1676 Input Parameters: 1677 + comm - MPI communicator, set to PETSC_COMM_SELF 1678 . bs - size of block 1679 . m - number of rows, or number of columns 1680 . nz - number of block nonzeros per block row (same for all rows) 1681 - nnz - array containing the number of block nonzeros in the upper triangular plus 1682 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1683 1684 Output Parameter: 1685 . A - the symmetric matrix 1686 1687 Options Database Keys: 1688 . -mat_no_unroll - uses code that does not unroll the loops in the 1689 block calculations (much slower) 1690 . -mat_block_size - size of the blocks to use 1691 1692 Level: intermediate 1693 1694 Notes: 1695 1696 Specify the preallocated storage with either nz or nnz (not both). 1697 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1698 allocation. For additional details, see the users manual chapter on 1699 matrices. 1700 1701 If the nnz parameter is given then the nz parameter is ignored 1702 1703 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1704 @*/ 1705 PetscErrorCode MatCreateSeqSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 1706 { 1707 PetscErrorCode ierr; 1708 1709 PetscFunctionBegin; 1710 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1711 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1712 ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1713 PetscFunctionReturn(0); 1714 } 1715 1716 #undef __FUNCT__ 1717 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1718 PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1719 { 1720 Mat C; 1721 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1722 PetscErrorCode ierr; 1723 PetscInt i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 1724 1725 PetscFunctionBegin; 1726 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1727 1728 *B = 0; 1729 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1730 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 1731 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1732 c = (Mat_SeqSBAIJ*)C->data; 1733 1734 C->preallocated = PETSC_TRUE; 1735 C->factor = A->factor; 1736 c->row = 0; 1737 c->icol = 0; 1738 c->saved_values = 0; 1739 c->keepzeroedrows = a->keepzeroedrows; 1740 C->assembled = PETSC_TRUE; 1741 1742 C->M = A->M; 1743 C->N = A->N; 1744 C->bs = A->bs; 1745 c->bs2 = a->bs2; 1746 c->mbs = a->mbs; 1747 c->nbs = a->nbs; 1748 1749 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->imax);CHKERRQ(ierr); 1750 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->ilen);CHKERRQ(ierr); 1751 for (i=0; i<mbs; i++) { 1752 c->imax[i] = a->imax[i]; 1753 c->ilen[i] = a->ilen[i]; 1754 } 1755 1756 /* allocate the matrix space */ 1757 c->singlemalloc = PETSC_TRUE; 1758 len = (mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt)); 1759 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1760 c->j = (PetscInt*)(c->a + nz*bs2); 1761 c->i = c->j + nz; 1762 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1763 if (mbs > 0) { 1764 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1765 if (cpvalues == MAT_COPY_VALUES) { 1766 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1767 } else { 1768 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1769 } 1770 } 1771 1772 ierr = PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr); 1773 c->sorted = a->sorted; 1774 c->roworiented = a->roworiented; 1775 c->nonew = a->nonew; 1776 1777 if (a->diag) { 1778 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 1779 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1780 for (i=0; i<mbs; i++) { 1781 c->diag[i] = a->diag[i]; 1782 } 1783 } else c->diag = 0; 1784 c->nz = a->nz; 1785 c->maxnz = a->maxnz; 1786 c->solve_work = 0; 1787 c->mult_work = 0; 1788 *B = C; 1789 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1790 PetscFunctionReturn(0); 1791 } 1792 1793 #undef __FUNCT__ 1794 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1795 PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer,const MatType type,Mat *A) 1796 { 1797 Mat_SeqSBAIJ *a; 1798 Mat B; 1799 PetscErrorCode ierr; 1800 int fd; 1801 PetscMPIInt size; 1802 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 1803 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 1804 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 1805 PetscInt *masked,nmask,tmp,bs2,ishift; 1806 PetscScalar *aa; 1807 MPI_Comm comm = ((PetscObject)viewer)->comm; 1808 1809 PetscFunctionBegin; 1810 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1811 bs2 = bs*bs; 1812 1813 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1814 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1815 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1816 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1817 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1818 M = header[1]; N = header[2]; nz = header[3]; 1819 1820 if (header[3] < 0) { 1821 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1822 } 1823 1824 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1825 1826 /* 1827 This code adds extra rows to make sure the number of rows is 1828 divisible by the blocksize 1829 */ 1830 mbs = M/bs; 1831 extra_rows = bs - M + bs*(mbs); 1832 if (extra_rows == bs) extra_rows = 0; 1833 else mbs++; 1834 if (extra_rows) { 1835 PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n"); 1836 } 1837 1838 /* read in row lengths */ 1839 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1840 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1841 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1842 1843 /* read in column indices */ 1844 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1845 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1846 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1847 1848 /* loop over row lengths determining block row lengths */ 1849 ierr = PetscMalloc(mbs*sizeof(PetscInt),&s_browlengths);CHKERRQ(ierr); 1850 ierr = PetscMemzero(s_browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1851 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 1852 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1853 masked = mask + mbs; 1854 rowcount = 0; nzcount = 0; 1855 for (i=0; i<mbs; i++) { 1856 nmask = 0; 1857 for (j=0; j<bs; j++) { 1858 kmax = rowlengths[rowcount]; 1859 for (k=0; k<kmax; k++) { 1860 tmp = jj[nzcount++]/bs; /* block col. index */ 1861 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1862 } 1863 rowcount++; 1864 } 1865 s_browlengths[i] += nmask; 1866 1867 /* zero out the mask elements we set */ 1868 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1869 } 1870 1871 /* create our matrix */ 1872 ierr = MatCreate(comm,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows,&B);CHKERRQ(ierr); 1873 ierr = MatSetType(B,type);CHKERRQ(ierr); 1874 ierr = MatSeqSBAIJSetPreallocation(B,bs,0,s_browlengths);CHKERRQ(ierr); 1875 a = (Mat_SeqSBAIJ*)B->data; 1876 1877 /* set matrix "i" values */ 1878 a->i[0] = 0; 1879 for (i=1; i<= mbs; i++) { 1880 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1881 a->ilen[i-1] = s_browlengths[i-1]; 1882 } 1883 a->nz = a->i[mbs]; 1884 1885 /* read in nonzero values */ 1886 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1887 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1888 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1889 1890 /* set "a" and "j" values into matrix */ 1891 nzcount = 0; jcount = 0; 1892 for (i=0; i<mbs; i++) { 1893 nzcountb = nzcount; 1894 nmask = 0; 1895 for (j=0; j<bs; j++) { 1896 kmax = rowlengths[i*bs+j]; 1897 for (k=0; k<kmax; k++) { 1898 tmp = jj[nzcount++]/bs; /* block col. index */ 1899 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1900 } 1901 } 1902 /* sort the masked values */ 1903 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1904 1905 /* set "j" values into matrix */ 1906 maskcount = 1; 1907 for (j=0; j<nmask; j++) { 1908 a->j[jcount++] = masked[j]; 1909 mask[masked[j]] = maskcount++; 1910 } 1911 1912 /* set "a" values into matrix */ 1913 ishift = bs2*a->i[i]; 1914 for (j=0; j<bs; j++) { 1915 kmax = rowlengths[i*bs+j]; 1916 for (k=0; k<kmax; k++) { 1917 tmp = jj[nzcountb]/bs ; /* block col. index */ 1918 if (tmp >= i){ 1919 block = mask[tmp] - 1; 1920 point = jj[nzcountb] - bs*tmp; 1921 idx = ishift + bs2*block + j + bs*point; 1922 a->a[idx] = aa[nzcountb]; 1923 } 1924 nzcountb++; 1925 } 1926 } 1927 /* zero out the mask elements we set */ 1928 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1929 } 1930 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1931 1932 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1933 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1934 ierr = PetscFree(aa);CHKERRQ(ierr); 1935 ierr = PetscFree(jj);CHKERRQ(ierr); 1936 ierr = PetscFree(mask);CHKERRQ(ierr); 1937 1938 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1939 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1940 ierr = MatView_Private(B);CHKERRQ(ierr); 1941 *A = B; 1942 PetscFunctionReturn(0); 1943 } 1944 1945 #undef __FUNCT__ 1946 #define __FUNCT__ "MatRelax_SeqSBAIJ" 1947 PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1948 { 1949 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1950 MatScalar *aa=a->a,*v,*v1; 1951 PetscScalar *x,*b,*t,sum,d; 1952 PetscErrorCode ierr; 1953 PetscInt m=a->mbs,bs=A->bs,*ai=a->i,*aj=a->j; 1954 PetscInt nz,nz1,*vj,*vj1,i; 1955 1956 PetscFunctionBegin; 1957 its = its*lits; 1958 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1959 1960 if (bs > 1) 1961 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 1962 1963 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1964 if (xx != bb) { 1965 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1966 } else { 1967 b = x; 1968 } 1969 1970 ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr); 1971 1972 if (flag & SOR_ZERO_INITIAL_GUESS) { 1973 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1974 for (i=0; i<m; i++) 1975 t[i] = b[i]; 1976 1977 for (i=0; i<m; i++){ 1978 d = *(aa + ai[i]); /* diag[i] */ 1979 v = aa + ai[i] + 1; 1980 vj = aj + ai[i] + 1; 1981 nz = ai[i+1] - ai[i] - 1; 1982 x[i] = omega*t[i]/d; 1983 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 1984 PetscLogFlops(2*nz-1); 1985 } 1986 } 1987 1988 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 1989 for (i=0; i<m; i++) 1990 t[i] = b[i]; 1991 1992 for (i=0; i<m-1; i++){ /* update rhs */ 1993 v = aa + ai[i] + 1; 1994 vj = aj + ai[i] + 1; 1995 nz = ai[i+1] - ai[i] - 1; 1996 while (nz--) t[*vj++] -= x[i]*(*v++); 1997 PetscLogFlops(2*nz-1); 1998 } 1999 for (i=m-1; i>=0; i--){ 2000 d = *(aa + ai[i]); 2001 v = aa + ai[i] + 1; 2002 vj = aj + ai[i] + 1; 2003 nz = ai[i+1] - ai[i] - 1; 2004 sum = t[i]; 2005 while (nz--) sum -= x[*vj++]*(*v++); 2006 PetscLogFlops(2*nz-1); 2007 x[i] = (1-omega)*x[i] + omega*sum/d; 2008 } 2009 } 2010 its--; 2011 } 2012 2013 while (its--) { 2014 /* 2015 forward sweep: 2016 for i=0,...,m-1: 2017 sum[i] = (b[i] - U(i,:)x )/d[i]; 2018 x[i] = (1-omega)x[i] + omega*sum[i]; 2019 b = b - x[i]*U^T(i,:); 2020 2021 */ 2022 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2023 for (i=0; i<m; i++) 2024 t[i] = b[i]; 2025 2026 for (i=0; i<m; i++){ 2027 d = *(aa + ai[i]); /* diag[i] */ 2028 v = aa + ai[i] + 1; v1=v; 2029 vj = aj + ai[i] + 1; vj1=vj; 2030 nz = ai[i+1] - ai[i] - 1; nz1=nz; 2031 sum = t[i]; 2032 while (nz1--) sum -= (*v1++)*x[*vj1++]; 2033 x[i] = (1-omega)*x[i] + omega*sum/d; 2034 while (nz--) t[*vj++] -= x[i]*(*v++); 2035 PetscLogFlops(4*nz-2); 2036 } 2037 } 2038 2039 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2040 /* 2041 backward sweep: 2042 b = b - x[i]*U^T(i,:), i=0,...,n-2 2043 for i=m-1,...,0: 2044 sum[i] = (b[i] - U(i,:)x )/d[i]; 2045 x[i] = (1-omega)x[i] + omega*sum[i]; 2046 */ 2047 for (i=0; i<m; i++) 2048 t[i] = b[i]; 2049 2050 for (i=0; i<m-1; i++){ /* update rhs */ 2051 v = aa + ai[i] + 1; 2052 vj = aj + ai[i] + 1; 2053 nz = ai[i+1] - ai[i] - 1; 2054 while (nz--) t[*vj++] -= x[i]*(*v++); 2055 PetscLogFlops(2*nz-1); 2056 } 2057 for (i=m-1; i>=0; i--){ 2058 d = *(aa + ai[i]); 2059 v = aa + ai[i] + 1; 2060 vj = aj + ai[i] + 1; 2061 nz = ai[i+1] - ai[i] - 1; 2062 sum = t[i]; 2063 while (nz--) sum -= x[*vj++]*(*v++); 2064 PetscLogFlops(2*nz-1); 2065 x[i] = (1-omega)*x[i] + omega*sum/d; 2066 } 2067 } 2068 } 2069 2070 ierr = PetscFree(t);CHKERRQ(ierr); 2071 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2072 if (bb != xx) { 2073 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2074 } 2075 PetscFunctionReturn(0); 2076 } 2077 2078 2079 2080 2081 2082 2083