1 2 /* 3 Defines the basic matrix operations for the SBAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 7 #include "src/inline/spops.h" 8 #include "src/mat/impls/sbaij/seq/sbaij.h" 9 10 #define CHUNKSIZE 10 11 12 /* 13 Checks for missing diagonals 14 */ 15 #undef __FUNCT__ 16 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 17 PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A) 18 { 19 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 20 PetscErrorCode ierr; 21 PetscInt *diag,*jj = a->j,i; 22 23 PetscFunctionBegin; 24 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 25 diag = a->diag; 26 for (i=0; i<a->mbs; i++) { 27 if (jj[diag[i]] != i) SETERRQ1(PETSC_ERR_ARG_CORRUPT,"Matrix is missing diagonal number %D",i); 28 } 29 PetscFunctionReturn(0); 30 } 31 32 #undef __FUNCT__ 33 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 34 PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A) 35 { 36 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 37 PetscErrorCode ierr; 38 PetscInt i,mbs = a->mbs; 39 40 PetscFunctionBegin; 41 if (a->diag) PetscFunctionReturn(0); 42 43 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 44 ierr = PetscLogObjectMemory(A,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 45 for (i=0; i<mbs; i++) a->diag[i] = a->i[i]; 46 PetscFunctionReturn(0); 47 } 48 49 #undef __FUNCT__ 50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 51 static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 52 { 53 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 54 PetscInt n = a->mbs,i; 55 56 PetscFunctionBegin; 57 *nn = n; 58 if (!ia) PetscFunctionReturn(0); 59 60 if (oshift == 1) { 61 /* temporarily add 1 to i and j indices */ 62 PetscInt nz = a->i[n]; 63 for (i=0; i<nz; i++) a->j[i]++; 64 for (i=0; i<n+1; i++) a->i[i]++; 65 *ia = a->i; *ja = a->j; 66 } else { 67 *ia = a->i; *ja = a->j; 68 } 69 PetscFunctionReturn(0); 70 } 71 72 #undef __FUNCT__ 73 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 74 static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 75 { 76 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 77 PetscInt i,n = a->mbs; 78 79 PetscFunctionBegin; 80 if (!ia) PetscFunctionReturn(0); 81 82 if (oshift == 1) { 83 PetscInt nz = a->i[n]-1; 84 for (i=0; i<nz; i++) a->j[i]--; 85 for (i=0; i<n+1; i++) a->i[i]--; 86 } 87 PetscFunctionReturn(0); 88 } 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 92 PetscErrorCode MatDestroy_SeqSBAIJ(Mat A) 93 { 94 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 95 PetscErrorCode ierr; 96 97 PetscFunctionBegin; 98 PetscLogObjectState((PetscObject)A,"Rows=%D, NZ=%D",A->m,a->nz); 99 ierr = PetscFree(a->a);CHKERRQ(ierr); 100 if (!a->singlemalloc) { 101 ierr = PetscFree(a->i);CHKERRQ(ierr); 102 ierr = PetscFree(a->j);CHKERRQ(ierr); 103 } 104 if (a->row) { 105 ierr = ISDestroy(a->row);CHKERRQ(ierr); 106 } 107 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 108 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 109 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 110 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 111 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 112 if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);} 113 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 114 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 115 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 116 117 if (a->inew){ 118 ierr = PetscFree(a->inew);CHKERRQ(ierr); 119 a->inew = 0; 120 } 121 ierr = PetscFree(a);CHKERRQ(ierr); 122 123 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 124 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 125 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 126 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 127 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqbaij_C","",PETSC_NULL);CHKERRQ(ierr); 128 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 129 PetscFunctionReturn(0); 130 } 131 132 #undef __FUNCT__ 133 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 134 PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op) 135 { 136 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 137 138 PetscFunctionBegin; 139 switch (op) { 140 case MAT_ROW_ORIENTED: 141 a->roworiented = PETSC_TRUE; 142 break; 143 case MAT_COLUMN_ORIENTED: 144 a->roworiented = PETSC_FALSE; 145 break; 146 case MAT_COLUMNS_SORTED: 147 a->sorted = PETSC_TRUE; 148 break; 149 case MAT_COLUMNS_UNSORTED: 150 a->sorted = PETSC_FALSE; 151 break; 152 case MAT_KEEP_ZEROED_ROWS: 153 a->keepzeroedrows = PETSC_TRUE; 154 break; 155 case MAT_NO_NEW_NONZERO_LOCATIONS: 156 a->nonew = 1; 157 break; 158 case MAT_NEW_NONZERO_LOCATION_ERR: 159 a->nonew = -1; 160 break; 161 case MAT_NEW_NONZERO_ALLOCATION_ERR: 162 a->nonew = -2; 163 break; 164 case MAT_YES_NEW_NONZERO_LOCATIONS: 165 a->nonew = 0; 166 break; 167 case MAT_ROWS_SORTED: 168 case MAT_ROWS_UNSORTED: 169 case MAT_YES_NEW_DIAGONALS: 170 case MAT_IGNORE_OFF_PROC_ENTRIES: 171 case MAT_USE_HASH_TABLE: 172 PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n"); 173 break; 174 case MAT_NO_NEW_DIAGONALS: 175 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 176 case MAT_NOT_SYMMETRIC: 177 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 178 case MAT_HERMITIAN: 179 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 180 case MAT_SYMMETRIC: 181 case MAT_STRUCTURALLY_SYMMETRIC: 182 case MAT_NOT_HERMITIAN: 183 case MAT_SYMMETRY_ETERNAL: 184 case MAT_NOT_SYMMETRY_ETERNAL: 185 break; 186 default: 187 SETERRQ(PETSC_ERR_SUP,"unknown option"); 188 } 189 PetscFunctionReturn(0); 190 } 191 192 #undef __FUNCT__ 193 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 194 PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **v) 195 { 196 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 197 PetscErrorCode ierr; 198 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2; 199 MatScalar *aa,*aa_i; 200 PetscScalar *v_i; 201 202 PetscFunctionBegin; 203 bs = A->bs; 204 ai = a->i; 205 aj = a->j; 206 aa = a->a; 207 bs2 = a->bs2; 208 209 if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %D out of range", row); 210 211 bn = row/bs; /* Block number */ 212 bp = row % bs; /* Block position */ 213 M = ai[bn+1] - ai[bn]; 214 *ncols = bs*M; 215 216 if (v) { 217 *v = 0; 218 if (*ncols) { 219 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 220 for (i=0; i<M; i++) { /* for each block in the block row */ 221 v_i = *v + i*bs; 222 aa_i = aa + bs2*(ai[bn] + i); 223 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 224 } 225 } 226 } 227 228 if (cols) { 229 *cols = 0; 230 if (*ncols) { 231 ierr = PetscMalloc((*ncols+row)*sizeof(PetscInt),cols);CHKERRQ(ierr); 232 for (i=0; i<M; i++) { /* for each block in the block row */ 233 cols_i = *cols + i*bs; 234 itmp = bs*aj[ai[bn] + i]; 235 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 236 } 237 } 238 } 239 240 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 241 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 242 #ifdef column_search 243 v_i = *v + M*bs; 244 cols_i = *cols + M*bs; 245 for (i=0; i<bn; i++){ /* for each block row */ 246 M = ai[i+1] - ai[i]; 247 for (j=0; j<M; j++){ 248 itmp = aj[ai[i] + j]; /* block column value */ 249 if (itmp == bn){ 250 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 251 for (k=0; k<bs; k++) { 252 *cols_i++ = i*bs+k; 253 *v_i++ = aa_i[k]; 254 } 255 *ncols += bs; 256 break; 257 } 258 } 259 } 260 #endif 261 262 PetscFunctionReturn(0); 263 } 264 265 #undef __FUNCT__ 266 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 267 PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 268 { 269 PetscErrorCode ierr; 270 271 PetscFunctionBegin; 272 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 273 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 274 PetscFunctionReturn(0); 275 } 276 277 #undef __FUNCT__ 278 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 279 PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,Mat *B) 280 { 281 PetscErrorCode ierr; 282 PetscFunctionBegin; 283 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 284 PetscFunctionReturn(0); 285 } 286 287 #undef __FUNCT__ 288 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 289 static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 290 { 291 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 292 PetscErrorCode ierr; 293 PetscInt i,j,bs = A->bs,k,l,bs2=a->bs2; 294 char *name; 295 PetscViewerFormat format; 296 297 PetscFunctionBegin; 298 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 299 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 300 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 301 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 302 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 303 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 304 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 305 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 306 for (i=0; i<a->mbs; i++) { 307 for (j=0; j<bs; j++) { 308 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 309 for (k=a->i[i]; k<a->i[i+1]; k++) { 310 for (l=0; l<bs; l++) { 311 #if defined(PETSC_USE_COMPLEX) 312 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 313 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 314 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 315 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 316 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 317 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 318 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 319 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 320 } 321 #else 322 if (a->a[bs2*k + l*bs + j] != 0.0) { 323 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 324 } 325 #endif 326 } 327 } 328 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 329 } 330 } 331 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 332 } else { 333 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 334 for (i=0; i<a->mbs; i++) { 335 for (j=0; j<bs; j++) { 336 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 337 for (k=a->i[i]; k<a->i[i+1]; k++) { 338 for (l=0; l<bs; l++) { 339 #if defined(PETSC_USE_COMPLEX) 340 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 341 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 342 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 343 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 344 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 345 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 346 } else { 347 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 348 } 349 #else 350 ierr = PetscViewerASCIIPrintf(viewer," %D %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 351 #endif 352 } 353 } 354 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 355 } 356 } 357 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 358 } 359 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 360 PetscFunctionReturn(0); 361 } 362 363 #undef __FUNCT__ 364 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 365 static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 366 { 367 Mat A = (Mat) Aa; 368 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 369 PetscErrorCode ierr; 370 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->bs,bs2=a->bs2; 371 PetscMPIInt rank; 372 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 373 MatScalar *aa; 374 MPI_Comm comm; 375 PetscViewer viewer; 376 377 PetscFunctionBegin; 378 /* 379 This is nasty. If this is called from an originally parallel matrix 380 then all processes call this,but only the first has the matrix so the 381 rest should return immediately. 382 */ 383 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 384 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 385 if (rank) PetscFunctionReturn(0); 386 387 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 388 389 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 390 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 391 392 /* loop over matrix elements drawing boxes */ 393 color = PETSC_DRAW_BLUE; 394 for (i=0,row=0; i<mbs; i++,row+=bs) { 395 for (j=a->i[i]; j<a->i[i+1]; j++) { 396 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 397 x_l = a->j[j]*bs; x_r = x_l + 1.0; 398 aa = a->a + j*bs2; 399 for (k=0; k<bs; k++) { 400 for (l=0; l<bs; l++) { 401 if (PetscRealPart(*aa++) >= 0.) continue; 402 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 403 } 404 } 405 } 406 } 407 color = PETSC_DRAW_CYAN; 408 for (i=0,row=0; i<mbs; i++,row+=bs) { 409 for (j=a->i[i]; j<a->i[i+1]; j++) { 410 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 411 x_l = a->j[j]*bs; x_r = x_l + 1.0; 412 aa = a->a + j*bs2; 413 for (k=0; k<bs; k++) { 414 for (l=0; l<bs; l++) { 415 if (PetscRealPart(*aa++) != 0.) continue; 416 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 417 } 418 } 419 } 420 } 421 422 color = PETSC_DRAW_RED; 423 for (i=0,row=0; i<mbs; i++,row+=bs) { 424 for (j=a->i[i]; j<a->i[i+1]; j++) { 425 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 426 x_l = a->j[j]*bs; x_r = x_l + 1.0; 427 aa = a->a + j*bs2; 428 for (k=0; k<bs; k++) { 429 for (l=0; l<bs; l++) { 430 if (PetscRealPart(*aa++) <= 0.) continue; 431 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 432 } 433 } 434 } 435 } 436 PetscFunctionReturn(0); 437 } 438 439 #undef __FUNCT__ 440 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 441 static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 442 { 443 PetscErrorCode ierr; 444 PetscReal xl,yl,xr,yr,w,h; 445 PetscDraw draw; 446 PetscTruth isnull; 447 448 PetscFunctionBegin; 449 450 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 451 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 452 453 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 454 xr = A->m; yr = A->m; h = yr/10.0; w = xr/10.0; 455 xr += w; yr += h; xl = -w; yl = -h; 456 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 457 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 458 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 459 PetscFunctionReturn(0); 460 } 461 462 #undef __FUNCT__ 463 #define __FUNCT__ "MatView_SeqSBAIJ" 464 PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 465 { 466 PetscErrorCode ierr; 467 PetscTruth iascii,isdraw; 468 469 PetscFunctionBegin; 470 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 471 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 472 if (iascii){ 473 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 474 } else if (isdraw) { 475 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 476 } else { 477 Mat B; 478 ierr = MatConvert(A,MATSEQAIJ,&B);CHKERRQ(ierr); 479 ierr = MatView(B,viewer);CHKERRQ(ierr); 480 ierr = MatDestroy(B);CHKERRQ(ierr); 481 } 482 PetscFunctionReturn(0); 483 } 484 485 486 #undef __FUNCT__ 487 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 488 PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 489 { 490 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 491 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 492 PetscInt *ai = a->i,*ailen = a->ilen; 493 PetscInt brow,bcol,ridx,cidx,bs=A->bs,bs2=a->bs2; 494 MatScalar *ap,*aa = a->a,zero = 0.0; 495 496 PetscFunctionBegin; 497 for (k=0; k<m; k++) { /* loop over rows */ 498 row = im[k]; brow = row/bs; 499 if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",row); 500 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1); 501 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 502 nrow = ailen[brow]; 503 for (l=0; l<n; l++) { /* loop over columns */ 504 if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",in[l]); 505 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->n-1); 506 col = in[l] ; 507 bcol = col/bs; 508 cidx = col%bs; 509 ridx = row%bs; 510 high = nrow; 511 low = 0; /* assume unsorted */ 512 while (high-low > 5) { 513 t = (low+high)/2; 514 if (rp[t] > bcol) high = t; 515 else low = t; 516 } 517 for (i=low; i<high; i++) { 518 if (rp[i] > bcol) break; 519 if (rp[i] == bcol) { 520 *v++ = ap[bs2*i+bs*cidx+ridx]; 521 goto finished; 522 } 523 } 524 *v++ = zero; 525 finished:; 526 } 527 } 528 PetscFunctionReturn(0); 529 } 530 531 532 #undef __FUNCT__ 533 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 534 PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 535 { 536 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 537 PetscErrorCode ierr; 538 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 539 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 540 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->bs,stepval; 541 PetscTruth roworiented=a->roworiented; 542 const MatScalar *value = v; 543 MatScalar *ap,*aa = a->a,*bap; 544 545 PetscFunctionBegin; 546 if (roworiented) { 547 stepval = (n-1)*bs; 548 } else { 549 stepval = (m-1)*bs; 550 } 551 for (k=0; k<m; k++) { /* loop over added rows */ 552 row = im[k]; 553 if (row < 0) continue; 554 #if defined(PETSC_USE_DEBUG) 555 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 556 #endif 557 rp = aj + ai[row]; 558 ap = aa + bs2*ai[row]; 559 rmax = imax[row]; 560 nrow = ailen[row]; 561 low = 0; 562 for (l=0; l<n; l++) { /* loop over added columns */ 563 if (in[l] < 0) continue; 564 col = in[l]; 565 #if defined(PETSC_USE_DEBUG) 566 if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",col,a->nbs-1); 567 #endif 568 if (col < row) continue; /* ignore lower triangular block */ 569 if (roworiented) { 570 value = v + k*(stepval+bs)*bs + l*bs; 571 } else { 572 value = v + l*(stepval+bs)*bs + k*bs; 573 } 574 if (col < lastcol) low = 0; high = nrow; 575 lastcol = col; 576 while (high-low > 7) { 577 t = (low+high)/2; 578 if (rp[t] > col) high = t; 579 else low = t; 580 } 581 for (i=low; i<high; i++) { 582 if (rp[i] > col) break; 583 if (rp[i] == col) { 584 bap = ap + bs2*i; 585 if (roworiented) { 586 if (is == ADD_VALUES) { 587 for (ii=0; ii<bs; ii++,value+=stepval) { 588 for (jj=ii; jj<bs2; jj+=bs) { 589 bap[jj] += *value++; 590 } 591 } 592 } else { 593 for (ii=0; ii<bs; ii++,value+=stepval) { 594 for (jj=ii; jj<bs2; jj+=bs) { 595 bap[jj] = *value++; 596 } 597 } 598 } 599 } else { 600 if (is == ADD_VALUES) { 601 for (ii=0; ii<bs; ii++,value+=stepval) { 602 for (jj=0; jj<bs; jj++) { 603 *bap++ += *value++; 604 } 605 } 606 } else { 607 for (ii=0; ii<bs; ii++,value+=stepval) { 608 for (jj=0; jj<bs; jj++) { 609 *bap++ = *value++; 610 } 611 } 612 } 613 } 614 goto noinsert2; 615 } 616 } 617 if (nonew == 1) goto noinsert2; 618 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 619 if (nrow >= rmax) { 620 /* there is no extra room in row, therefore enlarge */ 621 PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 622 MatScalar *new_a; 623 624 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 625 626 /* malloc new storage space */ 627 len = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); 628 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 629 new_j = (PetscInt*)(new_a + bs2*new_nz); 630 new_i = new_j + new_nz; 631 632 /* copy over old data into new slots */ 633 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 634 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 635 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); 636 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 637 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); 638 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 639 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 640 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 641 /* free up old matrix storage */ 642 ierr = PetscFree(a->a);CHKERRQ(ierr); 643 if (!a->singlemalloc) { 644 ierr = PetscFree(a->i);CHKERRQ(ierr); 645 ierr = PetscFree(a->j);CHKERRQ(ierr); 646 } 647 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 648 a->singlemalloc = PETSC_TRUE; 649 650 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 651 rmax = imax[row] = imax[row] + CHUNKSIZE; 652 ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); 653 a->maxnz += bs2*CHUNKSIZE; 654 a->reallocs++; 655 a->nz++; 656 } 657 N = nrow++ - 1; 658 /* shift up all the later entries in this row */ 659 for (ii=N; ii>=i; ii--) { 660 rp[ii+1] = rp[ii]; 661 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 662 } 663 if (N >= i) { 664 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 665 } 666 rp[i] = col; 667 bap = ap + bs2*i; 668 if (roworiented) { 669 for (ii=0; ii<bs; ii++,value+=stepval) { 670 for (jj=ii; jj<bs2; jj+=bs) { 671 bap[jj] = *value++; 672 } 673 } 674 } else { 675 for (ii=0; ii<bs; ii++,value+=stepval) { 676 for (jj=0; jj<bs; jj++) { 677 *bap++ = *value++; 678 } 679 } 680 } 681 noinsert2:; 682 low = i; 683 } 684 ailen[row] = nrow; 685 } 686 PetscFunctionReturn(0); 687 } 688 689 #undef __FUNCT__ 690 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 691 PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 692 { 693 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 694 PetscErrorCode ierr; 695 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 696 PetscInt m = A->m,*ip,N,*ailen = a->ilen; 697 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 698 MatScalar *aa = a->a,*ap; 699 700 PetscFunctionBegin; 701 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 702 703 if (m) rmax = ailen[0]; 704 for (i=1; i<mbs; i++) { 705 /* move each row back by the amount of empty slots (fshift) before it*/ 706 fshift += imax[i-1] - ailen[i-1]; 707 rmax = PetscMax(rmax,ailen[i]); 708 if (fshift) { 709 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 710 N = ailen[i]; 711 for (j=0; j<N; j++) { 712 ip[j-fshift] = ip[j]; 713 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 714 } 715 } 716 ai[i] = ai[i-1] + ailen[i-1]; 717 } 718 if (mbs) { 719 fshift += imax[mbs-1] - ailen[mbs-1]; 720 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 721 } 722 /* reset ilen and imax for each row */ 723 for (i=0; i<mbs; i++) { 724 ailen[i] = imax[i] = ai[i+1] - ai[i]; 725 } 726 a->nz = ai[mbs]; 727 728 /* diagonals may have moved, reset it */ 729 if (a->diag) { 730 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 731 } 732 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n", 733 m,A->m,A->bs,fshift*bs2,a->nz*bs2); 734 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %D\n", 735 a->reallocs); 736 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %D\n",rmax); 737 a->reallocs = 0; 738 A->info.nz_unneeded = (PetscReal)fshift*bs2; 739 740 PetscFunctionReturn(0); 741 } 742 743 /* 744 This function returns an array of flags which indicate the locations of contiguous 745 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 746 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 747 Assume: sizes should be long enough to hold all the values. 748 */ 749 #undef __FUNCT__ 750 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 751 PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 752 { 753 PetscInt i,j,k,row; 754 PetscTruth flg; 755 756 PetscFunctionBegin; 757 for (i=0,j=0; i<n; j++) { 758 row = idx[i]; 759 if (row%bs!=0) { /* Not the begining of a block */ 760 sizes[j] = 1; 761 i++; 762 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 763 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 764 i++; 765 } else { /* Begining of the block, so check if the complete block exists */ 766 flg = PETSC_TRUE; 767 for (k=1; k<bs; k++) { 768 if (row+k != idx[i+k]) { /* break in the block */ 769 flg = PETSC_FALSE; 770 break; 771 } 772 } 773 if (flg) { /* No break in the bs */ 774 sizes[j] = bs; 775 i+= bs; 776 } else { 777 sizes[j] = 1; 778 i++; 779 } 780 } 781 } 782 *bs_max = j; 783 PetscFunctionReturn(0); 784 } 785 786 787 /* Only add/insert a(i,j) with i<=j (blocks). 788 Any a(i,j) with i>j input by user is ingored. 789 */ 790 791 #undef __FUNCT__ 792 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 793 PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 794 { 795 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 796 PetscErrorCode ierr; 797 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 798 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 799 PetscInt *aj=a->j,nonew=a->nonew,bs=A->bs,brow,bcol; 800 PetscInt ridx,cidx,bs2=a->bs2; 801 MatScalar *ap,value,*aa=a->a,*bap; 802 803 PetscFunctionBegin; 804 805 for (k=0; k<m; k++) { /* loop over added rows */ 806 row = im[k]; /* row number */ 807 brow = row/bs; /* block row number */ 808 if (row < 0) continue; 809 #if defined(PETSC_USE_DEBUG) 810 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1); 811 #endif 812 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 813 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 814 rmax = imax[brow]; /* maximum space allocated for this row */ 815 nrow = ailen[brow]; /* actual length of this row */ 816 low = 0; 817 818 for (l=0; l<n; l++) { /* loop over added columns */ 819 if (in[l] < 0) continue; 820 #if defined(PETSC_USE_DEBUG) 821 if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->m-1); 822 #endif 823 col = in[l]; 824 bcol = col/bs; /* block col number */ 825 826 if (brow <= bcol){ 827 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 828 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 829 /* element value a(k,l) */ 830 if (roworiented) { 831 value = v[l + k*n]; 832 } else { 833 value = v[k + l*m]; 834 } 835 836 /* move pointer bap to a(k,l) quickly and add/insert value */ 837 if (col < lastcol) low = 0; high = nrow; 838 lastcol = col; 839 while (high-low > 7) { 840 t = (low+high)/2; 841 if (rp[t] > bcol) high = t; 842 else low = t; 843 } 844 for (i=low; i<high; i++) { 845 /* printf("The loop of i=low.., rp[%D]=%D\n",i,rp[i]); */ 846 if (rp[i] > bcol) break; 847 if (rp[i] == bcol) { 848 bap = ap + bs2*i + bs*cidx + ridx; 849 if (is == ADD_VALUES) *bap += value; 850 else *bap = value; 851 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 852 if (brow == bcol && ridx < cidx){ 853 bap = ap + bs2*i + bs*ridx + cidx; 854 if (is == ADD_VALUES) *bap += value; 855 else *bap = value; 856 } 857 goto noinsert1; 858 } 859 } 860 861 if (nonew == 1) goto noinsert1; 862 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 863 if (nrow >= rmax) { 864 /* there is no extra room in row, therefore enlarge */ 865 PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 866 MatScalar *new_a; 867 868 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 869 870 /* Malloc new storage space */ 871 len = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); 872 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 873 new_j = (PetscInt*)(new_a + bs2*new_nz); 874 new_i = new_j + new_nz; 875 876 /* copy over old data into new slots */ 877 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 878 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 879 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); 880 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 881 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); 882 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 883 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 884 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 885 /* free up old matrix storage */ 886 ierr = PetscFree(a->a);CHKERRQ(ierr); 887 if (!a->singlemalloc) { 888 ierr = PetscFree(a->i);CHKERRQ(ierr); 889 ierr = PetscFree(a->j);CHKERRQ(ierr); 890 } 891 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 892 a->singlemalloc = PETSC_TRUE; 893 894 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 895 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 896 ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); 897 a->maxnz += bs2*CHUNKSIZE; 898 a->reallocs++; 899 a->nz++; 900 } 901 902 N = nrow++ - 1; 903 /* shift up all the later entries in this row */ 904 for (ii=N; ii>=i; ii--) { 905 rp[ii+1] = rp[ii]; 906 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 907 } 908 if (N>=i) { 909 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 910 } 911 rp[i] = bcol; 912 ap[bs2*i + bs*cidx + ridx] = value; 913 noinsert1:; 914 low = i; 915 /* } */ 916 } 917 } /* end of if .. if.. */ 918 } /* end of loop over added columns */ 919 ailen[brow] = nrow; 920 } /* end of loop over added rows */ 921 922 PetscFunctionReturn(0); 923 } 924 925 EXTERN PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*); 926 EXTERN PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*); 927 EXTERN PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat,PetscInt,IS[],PetscInt); 928 EXTERN PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,PetscInt,MatReuse,Mat*); 929 EXTERN PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat*[]); 930 EXTERN PetscErrorCode MatScale_SeqSBAIJ(const PetscScalar*,Mat); 931 EXTERN PetscErrorCode MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *); 932 EXTERN PetscErrorCode MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*); 933 EXTERN PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat,Vec); 934 EXTERN PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec); 935 EXTERN PetscErrorCode MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *); 936 EXTERN PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat); 937 EXTERN PetscErrorCode MatGetRowMax_SeqSBAIJ(Mat,Vec); 938 939 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N(Mat,Vec,Vec); 940 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1(Mat,Vec,Vec); 941 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2(Mat,Vec,Vec); 942 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3(Mat,Vec,Vec); 943 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4(Mat,Vec,Vec); 944 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5(Mat,Vec,Vec); 945 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6(Mat,Vec,Vec); 946 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7(Mat,Vec,Vec); 947 948 EXTERN PetscErrorCode MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs); 949 950 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec); 951 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec); 952 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec); 953 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 954 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec); 955 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec); 956 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec); 957 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec); 958 959 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,MatFactorInfo*,Mat*); 960 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,MatFactorInfo*,Mat*); 961 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,MatFactorInfo*,Mat*); 962 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,MatFactorInfo*,Mat*); 963 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,MatFactorInfo*,Mat*); 964 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,MatFactorInfo*,Mat*); 965 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,MatFactorInfo*,Mat*); 966 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,MatFactorInfo*,Mat*); 967 EXTERN PetscErrorCode MatGetInertia_SeqSBAIJ(Mat,PetscInt*,PetscInt*,PetscInt*); 968 969 EXTERN PetscErrorCode MatMult_SeqSBAIJ_1(Mat,Vec,Vec); 970 EXTERN PetscErrorCode MatMult_SeqSBAIJ_2(Mat,Vec,Vec); 971 EXTERN PetscErrorCode MatMult_SeqSBAIJ_3(Mat,Vec,Vec); 972 EXTERN PetscErrorCode MatMult_SeqSBAIJ_4(Mat,Vec,Vec); 973 EXTERN PetscErrorCode MatMult_SeqSBAIJ_5(Mat,Vec,Vec); 974 EXTERN PetscErrorCode MatMult_SeqSBAIJ_6(Mat,Vec,Vec); 975 EXTERN PetscErrorCode MatMult_SeqSBAIJ_7(Mat,Vec,Vec); 976 EXTERN PetscErrorCode MatMult_SeqSBAIJ_N(Mat,Vec,Vec); 977 978 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec); 979 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec); 980 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec); 981 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec); 982 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec); 983 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec); 984 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec); 985 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec); 986 987 #ifdef HAVE_MatICCFactor 988 /* modified from MatILUFactor_SeqSBAIJ, needs further work! */ 989 #undef __FUNCT__ 990 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 991 PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info) 992 { 993 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 994 Mat outA; 995 PetscErrorCode ierr; 996 PetscTruth row_identity,col_identity; 997 998 PetscFunctionBegin; 999 outA = inA; 1000 inA->factor = FACTOR_CHOLESKY; 1001 1002 if (!a->diag) { 1003 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 1004 } 1005 /* 1006 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1007 for ILU(0) factorization with natural ordering 1008 */ 1009 switch (a->bs) { 1010 case 1: 1011 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1012 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1013 inA->ops->solves = MatSolves_SeqSBAIJ_1; 1014 PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n"); 1015 case 2: 1016 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 1017 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1018 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1019 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1020 break; 1021 case 3: 1022 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 1023 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1024 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1025 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n"); 1026 break; 1027 case 4: 1028 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 1029 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1030 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1031 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1032 break; 1033 case 5: 1034 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 1035 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1036 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1037 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1038 break; 1039 case 6: 1040 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 1041 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1042 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1043 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1044 break; 1045 case 7: 1046 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 1047 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1048 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1049 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1050 break; 1051 default: 1052 a->row = row; 1053 a->icol = col; 1054 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1055 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1056 1057 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1058 ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr); 1059 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 1060 1061 if (!a->solve_work) { 1062 ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1063 ierr = PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 1064 } 1065 } 1066 1067 ierr = MatCholeskyFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 1068 1069 PetscFunctionReturn(0); 1070 } 1071 #endif 1072 1073 #undef __FUNCT__ 1074 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ" 1075 PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat A) 1076 { 1077 static PetscTruth called = PETSC_FALSE; 1078 MPI_Comm comm = A->comm; 1079 PetscErrorCode ierr; 1080 1081 PetscFunctionBegin; 1082 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1083 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1084 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1085 PetscFunctionReturn(0); 1086 } 1087 1088 EXTERN_C_BEGIN 1089 #undef __FUNCT__ 1090 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1091 PetscErrorCode MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,PetscInt *indices) 1092 { 1093 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1094 PetscInt i,nz,n; 1095 1096 PetscFunctionBegin; 1097 nz = baij->maxnz; 1098 n = mat->n; 1099 for (i=0; i<nz; i++) { 1100 baij->j[i] = indices[i]; 1101 } 1102 baij->nz = nz; 1103 for (i=0; i<n; i++) { 1104 baij->ilen[i] = baij->imax[i]; 1105 } 1106 1107 PetscFunctionReturn(0); 1108 } 1109 EXTERN_C_END 1110 1111 #undef __FUNCT__ 1112 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1113 /*@ 1114 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1115 in the matrix. 1116 1117 Input Parameters: 1118 + mat - the SeqSBAIJ matrix 1119 - indices - the column indices 1120 1121 Level: advanced 1122 1123 Notes: 1124 This can be called if you have precomputed the nonzero structure of the 1125 matrix and want to provide it to the matrix object to improve the performance 1126 of the MatSetValues() operation. 1127 1128 You MUST have set the correct numbers of nonzeros per row in the call to 1129 MatCreateSeqSBAIJ(). 1130 1131 MUST be called before any calls to MatSetValues() 1132 1133 .seealso: MatCreateSeqSBAIJ 1134 @*/ 1135 PetscErrorCode MatSeqSBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1136 { 1137 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1138 1139 PetscFunctionBegin; 1140 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1141 PetscValidPointer(indices,2); 1142 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1143 if (f) { 1144 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1145 } else { 1146 SETERRQ(PETSC_ERR_SUP,"Wrong type of matrix to set column indices"); 1147 } 1148 PetscFunctionReturn(0); 1149 } 1150 1151 #undef __FUNCT__ 1152 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1153 PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A) 1154 { 1155 PetscErrorCode ierr; 1156 1157 PetscFunctionBegin; 1158 ierr = MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1159 PetscFunctionReturn(0); 1160 } 1161 1162 #undef __FUNCT__ 1163 #define __FUNCT__ "MatGetArray_SeqSBAIJ" 1164 PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1165 { 1166 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1167 PetscFunctionBegin; 1168 *array = a->a; 1169 PetscFunctionReturn(0); 1170 } 1171 1172 #undef __FUNCT__ 1173 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ" 1174 PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1175 { 1176 PetscFunctionBegin; 1177 PetscFunctionReturn(0); 1178 } 1179 1180 #include "petscblaslapack.h" 1181 #undef __FUNCT__ 1182 #define __FUNCT__ "MatAXPY_SeqSBAIJ" 1183 PetscErrorCode MatAXPY_SeqSBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str) 1184 { 1185 Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data; 1186 PetscErrorCode ierr; 1187 PetscInt i,bs=Y->bs,bs2,j; 1188 PetscBLASInt bnz = (PetscBLASInt)x->nz,one = 1; 1189 1190 PetscFunctionBegin; 1191 if (str == SAME_NONZERO_PATTERN) { 1192 BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1193 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1194 if (y->xtoy && y->XtoY != X) { 1195 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1196 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1197 } 1198 if (!y->xtoy) { /* get xtoy */ 1199 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1200 y->XtoY = X; 1201 } 1202 bs2 = bs*bs; 1203 for (i=0; i<x->nz; i++) { 1204 j = 0; 1205 while (j < bs2){ 1206 y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]); 1207 j++; 1208 } 1209 } 1210 PetscLogInfo(0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)); 1211 } else { 1212 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1213 } 1214 PetscFunctionReturn(0); 1215 } 1216 1217 #undef __FUNCT__ 1218 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ" 1219 PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg) 1220 { 1221 PetscFunctionBegin; 1222 *flg = PETSC_TRUE; 1223 PetscFunctionReturn(0); 1224 } 1225 1226 #undef __FUNCT__ 1227 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ" 1228 PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg) 1229 { 1230 PetscFunctionBegin; 1231 *flg = PETSC_TRUE; 1232 PetscFunctionReturn(0); 1233 } 1234 1235 #undef __FUNCT__ 1236 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ" 1237 PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscTruth *flg) 1238 { 1239 PetscFunctionBegin; 1240 *flg = PETSC_FALSE; 1241 PetscFunctionReturn(0); 1242 } 1243 1244 /* -------------------------------------------------------------------*/ 1245 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1246 MatGetRow_SeqSBAIJ, 1247 MatRestoreRow_SeqSBAIJ, 1248 MatMult_SeqSBAIJ_N, 1249 /* 4*/ MatMultAdd_SeqSBAIJ_N, 1250 MatMult_SeqSBAIJ_N, 1251 MatMultAdd_SeqSBAIJ_N, 1252 MatSolve_SeqSBAIJ_N, 1253 0, 1254 0, 1255 /*10*/ 0, 1256 0, 1257 MatCholeskyFactor_SeqSBAIJ, 1258 MatRelax_SeqSBAIJ, 1259 MatTranspose_SeqSBAIJ, 1260 /*15*/ MatGetInfo_SeqSBAIJ, 1261 MatEqual_SeqSBAIJ, 1262 MatGetDiagonal_SeqSBAIJ, 1263 MatDiagonalScale_SeqSBAIJ, 1264 MatNorm_SeqSBAIJ, 1265 /*20*/ 0, 1266 MatAssemblyEnd_SeqSBAIJ, 1267 0, 1268 MatSetOption_SeqSBAIJ, 1269 MatZeroEntries_SeqSBAIJ, 1270 /*25*/ 0, 1271 0, 1272 0, 1273 MatCholeskyFactorSymbolic_SeqSBAIJ, 1274 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1275 /*30*/ MatSetUpPreallocation_SeqSBAIJ, 1276 0, 1277 MatICCFactorSymbolic_SeqSBAIJ, 1278 MatGetArray_SeqSBAIJ, 1279 MatRestoreArray_SeqSBAIJ, 1280 /*35*/ MatDuplicate_SeqSBAIJ, 1281 0, 1282 0, 1283 0, 1284 0, 1285 /*40*/ MatAXPY_SeqSBAIJ, 1286 MatGetSubMatrices_SeqSBAIJ, 1287 MatIncreaseOverlap_SeqSBAIJ, 1288 MatGetValues_SeqSBAIJ, 1289 0, 1290 /*45*/ MatPrintHelp_SeqSBAIJ, 1291 MatScale_SeqSBAIJ, 1292 0, 1293 0, 1294 0, 1295 /*50*/ 0, 1296 MatGetRowIJ_SeqSBAIJ, 1297 MatRestoreRowIJ_SeqSBAIJ, 1298 0, 1299 0, 1300 /*55*/ 0, 1301 0, 1302 0, 1303 0, 1304 MatSetValuesBlocked_SeqSBAIJ, 1305 /*60*/ MatGetSubMatrix_SeqSBAIJ, 1306 0, 1307 0, 1308 MatGetPetscMaps_Petsc, 1309 0, 1310 /*65*/ 0, 1311 0, 1312 0, 1313 0, 1314 0, 1315 /*70*/ MatGetRowMax_SeqSBAIJ, 1316 0, 1317 0, 1318 0, 1319 0, 1320 /*75*/ 0, 1321 0, 1322 0, 1323 0, 1324 0, 1325 /*80*/ 0, 1326 0, 1327 0, 1328 #if !defined(PETSC_USE_COMPLEX) 1329 MatGetInertia_SeqSBAIJ, 1330 #else 1331 0, 1332 #endif 1333 MatLoad_SeqSBAIJ, 1334 /*85*/ MatIsSymmetric_SeqSBAIJ, 1335 MatIsHermitian_SeqSBAIJ, 1336 MatIsStructurallySymmetric_SeqSBAIJ, 1337 0, 1338 0, 1339 /*90*/ 0, 1340 0, 1341 0, 1342 0, 1343 0, 1344 /*95*/ 0, 1345 0, 1346 0, 1347 0}; 1348 1349 EXTERN_C_BEGIN 1350 #undef __FUNCT__ 1351 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1352 PetscErrorCode MatStoreValues_SeqSBAIJ(Mat mat) 1353 { 1354 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1355 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 1356 PetscErrorCode ierr; 1357 1358 PetscFunctionBegin; 1359 if (aij->nonew != 1) { 1360 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1361 } 1362 1363 /* allocate space for values if not already there */ 1364 if (!aij->saved_values) { 1365 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1366 } 1367 1368 /* copy values over */ 1369 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1370 PetscFunctionReturn(0); 1371 } 1372 EXTERN_C_END 1373 1374 EXTERN_C_BEGIN 1375 #undef __FUNCT__ 1376 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1377 PetscErrorCode MatRetrieveValues_SeqSBAIJ(Mat mat) 1378 { 1379 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1380 PetscErrorCode ierr; 1381 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 1382 1383 PetscFunctionBegin; 1384 if (aij->nonew != 1) { 1385 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1386 } 1387 if (!aij->saved_values) { 1388 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 1389 } 1390 1391 /* copy values over */ 1392 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1393 PetscFunctionReturn(0); 1394 } 1395 EXTERN_C_END 1396 1397 EXTERN_C_BEGIN 1398 #undef __FUNCT__ 1399 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ" 1400 PetscErrorCode MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 1401 { 1402 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1403 PetscErrorCode ierr; 1404 PetscInt i,len,mbs,bs2; 1405 PetscTruth flg; 1406 1407 PetscFunctionBegin; 1408 B->preallocated = PETSC_TRUE; 1409 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1410 mbs = B->m/bs; 1411 bs2 = bs*bs; 1412 1413 if (mbs*bs != B->m) { 1414 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1415 } 1416 1417 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1418 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 1419 if (nnz) { 1420 for (i=0; i<mbs; i++) { 1421 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 1422 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],mbs); 1423 } 1424 } 1425 1426 ierr = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1427 if (!flg) { 1428 switch (bs) { 1429 case 1: 1430 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1431 B->ops->solve = MatSolve_SeqSBAIJ_1; 1432 B->ops->solves = MatSolves_SeqSBAIJ_1; 1433 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 1434 B->ops->mult = MatMult_SeqSBAIJ_1; 1435 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1436 break; 1437 case 2: 1438 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1439 B->ops->solve = MatSolve_SeqSBAIJ_2; 1440 B->ops->solvetranspose = MatSolve_SeqSBAIJ_2; 1441 B->ops->mult = MatMult_SeqSBAIJ_2; 1442 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1443 break; 1444 case 3: 1445 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1446 B->ops->solve = MatSolve_SeqSBAIJ_3; 1447 B->ops->solvetranspose = MatSolve_SeqSBAIJ_3; 1448 B->ops->mult = MatMult_SeqSBAIJ_3; 1449 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1450 break; 1451 case 4: 1452 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1453 B->ops->solve = MatSolve_SeqSBAIJ_4; 1454 B->ops->solvetranspose = MatSolve_SeqSBAIJ_4; 1455 B->ops->mult = MatMult_SeqSBAIJ_4; 1456 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1457 break; 1458 case 5: 1459 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1460 B->ops->solve = MatSolve_SeqSBAIJ_5; 1461 B->ops->solvetranspose = MatSolve_SeqSBAIJ_5; 1462 B->ops->mult = MatMult_SeqSBAIJ_5; 1463 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1464 break; 1465 case 6: 1466 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1467 B->ops->solve = MatSolve_SeqSBAIJ_6; 1468 B->ops->solvetranspose = MatSolve_SeqSBAIJ_6; 1469 B->ops->mult = MatMult_SeqSBAIJ_6; 1470 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1471 break; 1472 case 7: 1473 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1474 B->ops->solve = MatSolve_SeqSBAIJ_7; 1475 B->ops->solvetranspose = MatSolve_SeqSBAIJ_7; 1476 B->ops->mult = MatMult_SeqSBAIJ_7; 1477 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1478 break; 1479 } 1480 } 1481 1482 b->mbs = mbs; 1483 b->nbs = mbs; 1484 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->imax);CHKERRQ(ierr); 1485 if (!nnz) { 1486 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1487 else if (nz <= 0) nz = 1; 1488 for (i=0; i<mbs; i++) { 1489 b->imax[i] = nz; 1490 } 1491 nz = nz*mbs; /* total nz */ 1492 } else { 1493 nz = 0; 1494 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1495 } 1496 /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1497 1498 /* allocate the matrix space */ 1499 len = nz*sizeof(PetscInt) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(PetscInt); 1500 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1501 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1502 b->j = (PetscInt*)(b->a + nz*bs2); 1503 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1504 b->i = b->j + nz; 1505 b->singlemalloc = PETSC_TRUE; 1506 1507 /* pointer to beginning of each row */ 1508 b->i[0] = 0; 1509 for (i=1; i<mbs+1; i++) { 1510 b->i[i] = b->i[i-1] + b->imax[i-1]; 1511 } 1512 1513 /* b->ilen will count nonzeros in each block row so far. */ 1514 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->ilen);CHKERRQ(ierr); 1515 ierr = PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr); 1516 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1517 1518 B->bs = bs; 1519 b->bs2 = bs2; 1520 b->nz = 0; 1521 b->maxnz = nz*bs2; 1522 1523 b->inew = 0; 1524 b->jnew = 0; 1525 b->anew = 0; 1526 b->a2anew = 0; 1527 b->permute = PETSC_FALSE; 1528 PetscFunctionReturn(0); 1529 } 1530 EXTERN_C_END 1531 1532 EXTERN_C_BEGIN 1533 EXTERN PetscErrorCode MatConvert_SeqSBAIJ_SeqAIJ(Mat,const MatType,Mat*); 1534 EXTERN PetscErrorCode MatConvert_SeqSBAIJ_SeqBAIJ(Mat,const MatType,Mat*); 1535 EXTERN_C_END 1536 1537 /*MC 1538 MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices, 1539 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1540 1541 Options Database Keys: 1542 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions() 1543 1544 Level: beginner 1545 1546 .seealso: MatCreateSeqSBAIJ 1547 M*/ 1548 1549 EXTERN_C_BEGIN 1550 #undef __FUNCT__ 1551 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1552 PetscErrorCode MatCreate_SeqSBAIJ(Mat B) 1553 { 1554 Mat_SeqSBAIJ *b; 1555 PetscErrorCode ierr; 1556 PetscMPIInt size; 1557 1558 PetscFunctionBegin; 1559 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1560 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1561 B->m = B->M = PetscMax(B->m,B->M); 1562 B->n = B->N = PetscMax(B->n,B->N); 1563 1564 ierr = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1565 B->data = (void*)b; 1566 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1567 B->ops->destroy = MatDestroy_SeqSBAIJ; 1568 B->ops->view = MatView_SeqSBAIJ; 1569 B->factor = 0; 1570 B->lupivotthreshold = 1.0; 1571 B->mapping = 0; 1572 b->row = 0; 1573 b->icol = 0; 1574 b->reallocs = 0; 1575 b->saved_values = 0; 1576 1577 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1578 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1579 1580 b->sorted = PETSC_FALSE; 1581 b->roworiented = PETSC_TRUE; 1582 b->nonew = 0; 1583 b->diag = 0; 1584 b->solve_work = 0; 1585 b->mult_work = 0; 1586 B->spptr = 0; 1587 b->keepzeroedrows = PETSC_FALSE; 1588 b->xtoy = 0; 1589 b->XtoY = 0; 1590 1591 b->inew = 0; 1592 b->jnew = 0; 1593 b->anew = 0; 1594 b->a2anew = 0; 1595 b->permute = PETSC_FALSE; 1596 1597 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1598 "MatStoreValues_SeqSBAIJ", 1599 MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1600 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1601 "MatRetrieveValues_SeqSBAIJ", 1602 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1603 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1604 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1605 MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1606 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C", 1607 "MatConvert_SeqSBAIJ_SeqAIJ", 1608 MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr); 1609 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C", 1610 "MatConvert_SeqSBAIJ_SeqBAIJ", 1611 MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr); 1612 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C", 1613 "MatSeqSBAIJSetPreallocation_SeqSBAIJ", 1614 MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr); 1615 1616 B->symmetric = PETSC_TRUE; 1617 B->structurally_symmetric = PETSC_TRUE; 1618 B->symmetric_set = PETSC_TRUE; 1619 B->structurally_symmetric_set = PETSC_TRUE; 1620 PetscFunctionReturn(0); 1621 } 1622 EXTERN_C_END 1623 1624 #undef __FUNCT__ 1625 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1626 /*@C 1627 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1628 compressed row) format. For good matrix assembly performance the 1629 user should preallocate the matrix storage by setting the parameter nz 1630 (or the array nnz). By setting these parameters accurately, performance 1631 during matrix assembly can be increased by more than a factor of 50. 1632 1633 Collective on Mat 1634 1635 Input Parameters: 1636 + A - the symmetric matrix 1637 . bs - size of block 1638 . nz - number of block nonzeros per block row (same for all rows) 1639 - nnz - array containing the number of block nonzeros in the upper triangular plus 1640 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1641 1642 Options Database Keys: 1643 . -mat_no_unroll - uses code that does not unroll the loops in the 1644 block calculations (much slower) 1645 . -mat_block_size - size of the blocks to use 1646 1647 Level: intermediate 1648 1649 Notes: 1650 Specify the preallocated storage with either nz or nnz (not both). 1651 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1652 allocation. For additional details, see the users manual chapter on 1653 matrices. 1654 1655 If the nnz parameter is given then the nz parameter is ignored 1656 1657 1658 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1659 @*/ 1660 PetscErrorCode MatSeqSBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 1661 { 1662 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 1663 1664 PetscFunctionBegin; 1665 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1666 if (f) { 1667 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 1668 } 1669 PetscFunctionReturn(0); 1670 } 1671 1672 #undef __FUNCT__ 1673 #define __FUNCT__ "MatCreateSeqSBAIJ" 1674 /*@C 1675 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1676 compressed row) format. For good matrix assembly performance the 1677 user should preallocate the matrix storage by setting the parameter nz 1678 (or the array nnz). By setting these parameters accurately, performance 1679 during matrix assembly can be increased by more than a factor of 50. 1680 1681 Collective on MPI_Comm 1682 1683 Input Parameters: 1684 + comm - MPI communicator, set to PETSC_COMM_SELF 1685 . bs - size of block 1686 . m - number of rows, or number of columns 1687 . nz - number of block nonzeros per block row (same for all rows) 1688 - nnz - array containing the number of block nonzeros in the upper triangular plus 1689 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1690 1691 Output Parameter: 1692 . A - the symmetric matrix 1693 1694 Options Database Keys: 1695 . -mat_no_unroll - uses code that does not unroll the loops in the 1696 block calculations (much slower) 1697 . -mat_block_size - size of the blocks to use 1698 1699 Level: intermediate 1700 1701 Notes: 1702 1703 Specify the preallocated storage with either nz or nnz (not both). 1704 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1705 allocation. For additional details, see the users manual chapter on 1706 matrices. 1707 1708 If the nnz parameter is given then the nz parameter is ignored 1709 1710 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1711 @*/ 1712 PetscErrorCode MatCreateSeqSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 1713 { 1714 PetscErrorCode ierr; 1715 1716 PetscFunctionBegin; 1717 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1718 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1719 ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1720 PetscFunctionReturn(0); 1721 } 1722 1723 #undef __FUNCT__ 1724 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1725 PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1726 { 1727 Mat C; 1728 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1729 PetscErrorCode ierr; 1730 PetscInt i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 1731 1732 PetscFunctionBegin; 1733 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1734 1735 *B = 0; 1736 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1737 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 1738 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1739 c = (Mat_SeqSBAIJ*)C->data; 1740 1741 C->preallocated = PETSC_TRUE; 1742 C->factor = A->factor; 1743 c->row = 0; 1744 c->icol = 0; 1745 c->saved_values = 0; 1746 c->keepzeroedrows = a->keepzeroedrows; 1747 C->assembled = PETSC_TRUE; 1748 1749 C->M = A->M; 1750 C->N = A->N; 1751 C->bs = A->bs; 1752 c->bs2 = a->bs2; 1753 c->mbs = a->mbs; 1754 c->nbs = a->nbs; 1755 1756 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->imax);CHKERRQ(ierr); 1757 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->ilen);CHKERRQ(ierr); 1758 for (i=0; i<mbs; i++) { 1759 c->imax[i] = a->imax[i]; 1760 c->ilen[i] = a->ilen[i]; 1761 } 1762 1763 /* allocate the matrix space */ 1764 c->singlemalloc = PETSC_TRUE; 1765 len = (mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt)); 1766 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1767 c->j = (PetscInt*)(c->a + nz*bs2); 1768 c->i = c->j + nz; 1769 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1770 if (mbs > 0) { 1771 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1772 if (cpvalues == MAT_COPY_VALUES) { 1773 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1774 } else { 1775 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1776 } 1777 } 1778 1779 ierr = PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr); 1780 c->sorted = a->sorted; 1781 c->roworiented = a->roworiented; 1782 c->nonew = a->nonew; 1783 1784 if (a->diag) { 1785 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 1786 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1787 for (i=0; i<mbs; i++) { 1788 c->diag[i] = a->diag[i]; 1789 } 1790 } else c->diag = 0; 1791 c->nz = a->nz; 1792 c->maxnz = a->maxnz; 1793 c->solve_work = 0; 1794 c->mult_work = 0; 1795 *B = C; 1796 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1797 PetscFunctionReturn(0); 1798 } 1799 1800 #undef __FUNCT__ 1801 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1802 PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer,const MatType type,Mat *A) 1803 { 1804 Mat_SeqSBAIJ *a; 1805 Mat B; 1806 PetscErrorCode ierr; 1807 int fd; 1808 PetscMPIInt size; 1809 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 1810 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 1811 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 1812 PetscInt *masked,nmask,tmp,bs2,ishift; 1813 PetscScalar *aa; 1814 MPI_Comm comm = ((PetscObject)viewer)->comm; 1815 1816 PetscFunctionBegin; 1817 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1818 bs2 = bs*bs; 1819 1820 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1821 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1822 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1823 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1824 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1825 M = header[1]; N = header[2]; nz = header[3]; 1826 1827 if (header[3] < 0) { 1828 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1829 } 1830 1831 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1832 1833 /* 1834 This code adds extra rows to make sure the number of rows is 1835 divisible by the blocksize 1836 */ 1837 mbs = M/bs; 1838 extra_rows = bs - M + bs*(mbs); 1839 if (extra_rows == bs) extra_rows = 0; 1840 else mbs++; 1841 if (extra_rows) { 1842 PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n"); 1843 } 1844 1845 /* read in row lengths */ 1846 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1847 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1848 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1849 1850 /* read in column indices */ 1851 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1852 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1853 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1854 1855 /* loop over row lengths determining block row lengths */ 1856 ierr = PetscMalloc(mbs*sizeof(PetscInt),&s_browlengths);CHKERRQ(ierr); 1857 ierr = PetscMemzero(s_browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1858 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 1859 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1860 masked = mask + mbs; 1861 rowcount = 0; nzcount = 0; 1862 for (i=0; i<mbs; i++) { 1863 nmask = 0; 1864 for (j=0; j<bs; j++) { 1865 kmax = rowlengths[rowcount]; 1866 for (k=0; k<kmax; k++) { 1867 tmp = jj[nzcount++]/bs; /* block col. index */ 1868 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1869 } 1870 rowcount++; 1871 } 1872 s_browlengths[i] += nmask; 1873 1874 /* zero out the mask elements we set */ 1875 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1876 } 1877 1878 /* create our matrix */ 1879 ierr = MatCreate(comm,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows,&B);CHKERRQ(ierr); 1880 ierr = MatSetType(B,type);CHKERRQ(ierr); 1881 ierr = MatSeqSBAIJSetPreallocation(B,bs,0,s_browlengths);CHKERRQ(ierr); 1882 a = (Mat_SeqSBAIJ*)B->data; 1883 1884 /* set matrix "i" values */ 1885 a->i[0] = 0; 1886 for (i=1; i<= mbs; i++) { 1887 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1888 a->ilen[i-1] = s_browlengths[i-1]; 1889 } 1890 a->nz = a->i[mbs]; 1891 1892 /* read in nonzero values */ 1893 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1894 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1895 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1896 1897 /* set "a" and "j" values into matrix */ 1898 nzcount = 0; jcount = 0; 1899 for (i=0; i<mbs; i++) { 1900 nzcountb = nzcount; 1901 nmask = 0; 1902 for (j=0; j<bs; j++) { 1903 kmax = rowlengths[i*bs+j]; 1904 for (k=0; k<kmax; k++) { 1905 tmp = jj[nzcount++]/bs; /* block col. index */ 1906 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1907 } 1908 } 1909 /* sort the masked values */ 1910 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1911 1912 /* set "j" values into matrix */ 1913 maskcount = 1; 1914 for (j=0; j<nmask; j++) { 1915 a->j[jcount++] = masked[j]; 1916 mask[masked[j]] = maskcount++; 1917 } 1918 1919 /* set "a" values into matrix */ 1920 ishift = bs2*a->i[i]; 1921 for (j=0; j<bs; j++) { 1922 kmax = rowlengths[i*bs+j]; 1923 for (k=0; k<kmax; k++) { 1924 tmp = jj[nzcountb]/bs ; /* block col. index */ 1925 if (tmp >= i){ 1926 block = mask[tmp] - 1; 1927 point = jj[nzcountb] - bs*tmp; 1928 idx = ishift + bs2*block + j + bs*point; 1929 a->a[idx] = aa[nzcountb]; 1930 } 1931 nzcountb++; 1932 } 1933 } 1934 /* zero out the mask elements we set */ 1935 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1936 } 1937 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1938 1939 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1940 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1941 ierr = PetscFree(aa);CHKERRQ(ierr); 1942 ierr = PetscFree(jj);CHKERRQ(ierr); 1943 ierr = PetscFree(mask);CHKERRQ(ierr); 1944 1945 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1946 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1947 ierr = MatView_Private(B);CHKERRQ(ierr); 1948 *A = B; 1949 PetscFunctionReturn(0); 1950 } 1951 1952 #undef __FUNCT__ 1953 #define __FUNCT__ "MatRelax_SeqSBAIJ" 1954 PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1955 { 1956 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1957 MatScalar *aa=a->a,*v,*v1; 1958 PetscScalar *x,*b,*t,sum,d; 1959 PetscErrorCode ierr; 1960 PetscInt m=a->mbs,bs=A->bs,*ai=a->i,*aj=a->j; 1961 PetscInt nz,nz1,*vj,*vj1,i; 1962 1963 PetscFunctionBegin; 1964 its = its*lits; 1965 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1966 1967 if (bs > 1) 1968 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 1969 1970 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1971 if (xx != bb) { 1972 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1973 } else { 1974 b = x; 1975 } 1976 1977 ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr); 1978 1979 if (flag & SOR_ZERO_INITIAL_GUESS) { 1980 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1981 for (i=0; i<m; i++) 1982 t[i] = b[i]; 1983 1984 for (i=0; i<m; i++){ 1985 d = *(aa + ai[i]); /* diag[i] */ 1986 v = aa + ai[i] + 1; 1987 vj = aj + ai[i] + 1; 1988 nz = ai[i+1] - ai[i] - 1; 1989 x[i] = omega*t[i]/d; 1990 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 1991 PetscLogFlops(2*nz-1); 1992 } 1993 } 1994 1995 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 1996 for (i=0; i<m; i++) 1997 t[i] = b[i]; 1998 1999 for (i=0; i<m-1; i++){ /* update rhs */ 2000 v = aa + ai[i] + 1; 2001 vj = aj + ai[i] + 1; 2002 nz = ai[i+1] - ai[i] - 1; 2003 while (nz--) t[*vj++] -= x[i]*(*v++); 2004 PetscLogFlops(2*nz-1); 2005 } 2006 for (i=m-1; i>=0; i--){ 2007 d = *(aa + ai[i]); 2008 v = aa + ai[i] + 1; 2009 vj = aj + ai[i] + 1; 2010 nz = ai[i+1] - ai[i] - 1; 2011 sum = t[i]; 2012 while (nz--) sum -= x[*vj++]*(*v++); 2013 PetscLogFlops(2*nz-1); 2014 x[i] = (1-omega)*x[i] + omega*sum/d; 2015 } 2016 } 2017 its--; 2018 } 2019 2020 while (its--) { 2021 /* 2022 forward sweep: 2023 for i=0,...,m-1: 2024 sum[i] = (b[i] - U(i,:)x )/d[i]; 2025 x[i] = (1-omega)x[i] + omega*sum[i]; 2026 b = b - x[i]*U^T(i,:); 2027 2028 */ 2029 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2030 for (i=0; i<m; i++) 2031 t[i] = b[i]; 2032 2033 for (i=0; i<m; i++){ 2034 d = *(aa + ai[i]); /* diag[i] */ 2035 v = aa + ai[i] + 1; v1=v; 2036 vj = aj + ai[i] + 1; vj1=vj; 2037 nz = ai[i+1] - ai[i] - 1; nz1=nz; 2038 sum = t[i]; 2039 while (nz1--) sum -= (*v1++)*x[*vj1++]; 2040 x[i] = (1-omega)*x[i] + omega*sum/d; 2041 while (nz--) t[*vj++] -= x[i]*(*v++); 2042 PetscLogFlops(4*nz-2); 2043 } 2044 } 2045 2046 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2047 /* 2048 backward sweep: 2049 b = b - x[i]*U^T(i,:), i=0,...,n-2 2050 for i=m-1,...,0: 2051 sum[i] = (b[i] - U(i,:)x )/d[i]; 2052 x[i] = (1-omega)x[i] + omega*sum[i]; 2053 */ 2054 for (i=0; i<m; i++) 2055 t[i] = b[i]; 2056 2057 for (i=0; i<m-1; i++){ /* update rhs */ 2058 v = aa + ai[i] + 1; 2059 vj = aj + ai[i] + 1; 2060 nz = ai[i+1] - ai[i] - 1; 2061 while (nz--) t[*vj++] -= x[i]*(*v++); 2062 PetscLogFlops(2*nz-1); 2063 } 2064 for (i=m-1; i>=0; i--){ 2065 d = *(aa + ai[i]); 2066 v = aa + ai[i] + 1; 2067 vj = aj + ai[i] + 1; 2068 nz = ai[i+1] - ai[i] - 1; 2069 sum = t[i]; 2070 while (nz--) sum -= x[*vj++]*(*v++); 2071 PetscLogFlops(2*nz-1); 2072 x[i] = (1-omega)*x[i] + omega*sum/d; 2073 } 2074 } 2075 } 2076 2077 ierr = PetscFree(t);CHKERRQ(ierr); 2078 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2079 if (bb != xx) { 2080 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2081 } 2082 2083 PetscFunctionReturn(0); 2084 } 2085 2086 2087 2088 2089 2090 2091