1 2 /* 3 Defines the basic matrix operations for the SBAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 7 #include "src/inline/spops.h" 8 #include "src/mat/impls/sbaij/seq/sbaij.h" 9 10 #define CHUNKSIZE 10 11 12 /* 13 Checks for missing diagonals 14 */ 15 #undef __FUNCT__ 16 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 17 PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A) 18 { 19 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 20 PetscErrorCode ierr; 21 PetscInt *diag,*jj = a->j,i; 22 23 PetscFunctionBegin; 24 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 25 diag = a->diag; 26 for (i=0; i<a->mbs; i++) { 27 if (jj[diag[i]] != i) SETERRQ1(PETSC_ERR_ARG_CORRUPT,"Matrix is missing diagonal number %D",i); 28 } 29 PetscFunctionReturn(0); 30 } 31 32 #undef __FUNCT__ 33 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 34 PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A) 35 { 36 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 37 PetscErrorCode ierr; 38 PetscInt i,mbs = a->mbs; 39 40 PetscFunctionBegin; 41 if (a->diag) PetscFunctionReturn(0); 42 43 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 44 ierr = PetscLogObjectMemory(A,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 45 for (i=0; i<mbs; i++) a->diag[i] = a->i[i]; 46 PetscFunctionReturn(0); 47 } 48 49 #undef __FUNCT__ 50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 51 static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 52 { 53 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 54 PetscInt n = a->mbs,i; 55 56 PetscFunctionBegin; 57 *nn = n; 58 if (!ia) PetscFunctionReturn(0); 59 60 if (oshift == 1) { 61 /* temporarily add 1 to i and j indices */ 62 PetscInt nz = a->i[n]; 63 for (i=0; i<nz; i++) a->j[i]++; 64 for (i=0; i<n+1; i++) a->i[i]++; 65 *ia = a->i; *ja = a->j; 66 } else { 67 *ia = a->i; *ja = a->j; 68 } 69 PetscFunctionReturn(0); 70 } 71 72 #undef __FUNCT__ 73 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 74 static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 75 { 76 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 77 PetscInt i,n = a->mbs; 78 79 PetscFunctionBegin; 80 if (!ia) PetscFunctionReturn(0); 81 82 if (oshift == 1) { 83 PetscInt nz = a->i[n]-1; 84 for (i=0; i<nz; i++) a->j[i]--; 85 for (i=0; i<n+1; i++) a->i[i]--; 86 } 87 PetscFunctionReturn(0); 88 } 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 92 PetscErrorCode MatDestroy_SeqSBAIJ(Mat A) 93 { 94 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 95 PetscErrorCode ierr; 96 97 PetscFunctionBegin; 98 PetscLogObjectState((PetscObject)A,"Rows=%D, NZ=%D",A->m,a->nz); 99 ierr = PetscFree(a->a);CHKERRQ(ierr); 100 if (!a->singlemalloc) { 101 ierr = PetscFree(a->i);CHKERRQ(ierr); 102 ierr = PetscFree(a->j);CHKERRQ(ierr); 103 } 104 if (a->row) { 105 ierr = ISDestroy(a->row);CHKERRQ(ierr); 106 } 107 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 108 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 109 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 110 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 111 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 112 if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);} 113 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 114 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 115 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 116 117 if (a->inew){ 118 ierr = PetscFree(a->inew);CHKERRQ(ierr); 119 a->inew = 0; 120 } 121 ierr = PetscFree(a);CHKERRQ(ierr); 122 123 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 124 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 125 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 126 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 127 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqbaij_C","",PETSC_NULL);CHKERRQ(ierr); 128 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 129 PetscFunctionReturn(0); 130 } 131 132 #undef __FUNCT__ 133 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 134 PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op) 135 { 136 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 137 PetscErrorCode ierr; 138 139 PetscFunctionBegin; 140 switch (op) { 141 case MAT_ROW_ORIENTED: 142 a->roworiented = PETSC_TRUE; 143 break; 144 case MAT_COLUMN_ORIENTED: 145 a->roworiented = PETSC_FALSE; 146 break; 147 case MAT_COLUMNS_SORTED: 148 a->sorted = PETSC_TRUE; 149 break; 150 case MAT_COLUMNS_UNSORTED: 151 a->sorted = PETSC_FALSE; 152 break; 153 case MAT_KEEP_ZEROED_ROWS: 154 a->keepzeroedrows = PETSC_TRUE; 155 break; 156 case MAT_NO_NEW_NONZERO_LOCATIONS: 157 a->nonew = 1; 158 break; 159 case MAT_NEW_NONZERO_LOCATION_ERR: 160 a->nonew = -1; 161 break; 162 case MAT_NEW_NONZERO_ALLOCATION_ERR: 163 a->nonew = -2; 164 break; 165 case MAT_YES_NEW_NONZERO_LOCATIONS: 166 a->nonew = 0; 167 break; 168 case MAT_ROWS_SORTED: 169 case MAT_ROWS_UNSORTED: 170 case MAT_YES_NEW_DIAGONALS: 171 case MAT_IGNORE_OFF_PROC_ENTRIES: 172 case MAT_USE_HASH_TABLE: 173 ierr = PetscLogInfo((A,"MatSetOption_SeqSBAIJ:Option ignored\n"));CHKERRQ(ierr); 174 break; 175 case MAT_NO_NEW_DIAGONALS: 176 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 177 case MAT_NOT_SYMMETRIC: 178 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 179 case MAT_HERMITIAN: 180 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 181 case MAT_SYMMETRIC: 182 case MAT_STRUCTURALLY_SYMMETRIC: 183 case MAT_NOT_HERMITIAN: 184 case MAT_SYMMETRY_ETERNAL: 185 case MAT_NOT_SYMMETRY_ETERNAL: 186 case MAT_IGNORE_LOWER_TRIANGULAR: 187 a->ignore_ltriangular = PETSC_TRUE; 188 break; 189 case MAT_ERROR_LOWER_TRIANGULAR: 190 a->ignore_ltriangular = PETSC_FALSE; 191 break; 192 default: 193 SETERRQ(PETSC_ERR_SUP,"unknown option"); 194 } 195 PetscFunctionReturn(0); 196 } 197 198 #undef __FUNCT__ 199 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 200 PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **v) 201 { 202 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 203 PetscErrorCode ierr; 204 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2; 205 MatScalar *aa,*aa_i; 206 PetscScalar *v_i; 207 208 PetscFunctionBegin; 209 bs = A->bs; 210 ai = a->i; 211 aj = a->j; 212 aa = a->a; 213 bs2 = a->bs2; 214 215 if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %D out of range", row); 216 217 bn = row/bs; /* Block number */ 218 bp = row % bs; /* Block position */ 219 M = ai[bn+1] - ai[bn]; 220 *ncols = bs*M; 221 222 if (v) { 223 *v = 0; 224 if (*ncols) { 225 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 226 for (i=0; i<M; i++) { /* for each block in the block row */ 227 v_i = *v + i*bs; 228 aa_i = aa + bs2*(ai[bn] + i); 229 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 230 } 231 } 232 } 233 234 if (cols) { 235 *cols = 0; 236 if (*ncols) { 237 ierr = PetscMalloc((*ncols+row)*sizeof(PetscInt),cols);CHKERRQ(ierr); 238 for (i=0; i<M; i++) { /* for each block in the block row */ 239 cols_i = *cols + i*bs; 240 itmp = bs*aj[ai[bn] + i]; 241 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 242 } 243 } 244 } 245 246 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 247 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 248 #ifdef column_search 249 v_i = *v + M*bs; 250 cols_i = *cols + M*bs; 251 for (i=0; i<bn; i++){ /* for each block row */ 252 M = ai[i+1] - ai[i]; 253 for (j=0; j<M; j++){ 254 itmp = aj[ai[i] + j]; /* block column value */ 255 if (itmp == bn){ 256 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 257 for (k=0; k<bs; k++) { 258 *cols_i++ = i*bs+k; 259 *v_i++ = aa_i[k]; 260 } 261 *ncols += bs; 262 break; 263 } 264 } 265 } 266 #endif 267 PetscFunctionReturn(0); 268 } 269 270 #undef __FUNCT__ 271 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 272 PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 273 { 274 PetscErrorCode ierr; 275 276 PetscFunctionBegin; 277 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 278 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 279 PetscFunctionReturn(0); 280 } 281 282 #undef __FUNCT__ 283 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 284 PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,Mat *B) 285 { 286 PetscErrorCode ierr; 287 PetscFunctionBegin; 288 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 289 PetscFunctionReturn(0); 290 } 291 292 #undef __FUNCT__ 293 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 294 static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 295 { 296 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 297 PetscErrorCode ierr; 298 PetscInt i,j,bs = A->bs,k,l,bs2=a->bs2; 299 char *name; 300 PetscViewerFormat format; 301 302 PetscFunctionBegin; 303 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 304 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 305 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 306 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 307 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 308 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 309 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 310 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 311 for (i=0; i<a->mbs; i++) { 312 for (j=0; j<bs; j++) { 313 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 314 for (k=a->i[i]; k<a->i[i+1]; k++) { 315 for (l=0; l<bs; l++) { 316 #if defined(PETSC_USE_COMPLEX) 317 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 318 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 319 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 320 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 321 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 322 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 323 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 324 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 325 } 326 #else 327 if (a->a[bs2*k + l*bs + j] != 0.0) { 328 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 329 } 330 #endif 331 } 332 } 333 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 334 } 335 } 336 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 337 } else { 338 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 339 for (i=0; i<a->mbs; i++) { 340 for (j=0; j<bs; j++) { 341 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 342 for (k=a->i[i]; k<a->i[i+1]; k++) { 343 for (l=0; l<bs; l++) { 344 #if defined(PETSC_USE_COMPLEX) 345 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 346 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 347 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 348 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 349 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 350 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 351 } else { 352 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 353 } 354 #else 355 ierr = PetscViewerASCIIPrintf(viewer," %D %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 356 #endif 357 } 358 } 359 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 360 } 361 } 362 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 363 } 364 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 365 PetscFunctionReturn(0); 366 } 367 368 #undef __FUNCT__ 369 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 370 static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 371 { 372 Mat A = (Mat) Aa; 373 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 374 PetscErrorCode ierr; 375 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->bs,bs2=a->bs2; 376 PetscMPIInt rank; 377 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 378 MatScalar *aa; 379 MPI_Comm comm; 380 PetscViewer viewer; 381 382 PetscFunctionBegin; 383 /* 384 This is nasty. If this is called from an originally parallel matrix 385 then all processes call this,but only the first has the matrix so the 386 rest should return immediately. 387 */ 388 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 389 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 390 if (rank) PetscFunctionReturn(0); 391 392 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 393 394 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 395 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 396 397 /* loop over matrix elements drawing boxes */ 398 color = PETSC_DRAW_BLUE; 399 for (i=0,row=0; i<mbs; i++,row+=bs) { 400 for (j=a->i[i]; j<a->i[i+1]; j++) { 401 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 402 x_l = a->j[j]*bs; x_r = x_l + 1.0; 403 aa = a->a + j*bs2; 404 for (k=0; k<bs; k++) { 405 for (l=0; l<bs; l++) { 406 if (PetscRealPart(*aa++) >= 0.) continue; 407 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 408 } 409 } 410 } 411 } 412 color = PETSC_DRAW_CYAN; 413 for (i=0,row=0; i<mbs; i++,row+=bs) { 414 for (j=a->i[i]; j<a->i[i+1]; j++) { 415 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 416 x_l = a->j[j]*bs; x_r = x_l + 1.0; 417 aa = a->a + j*bs2; 418 for (k=0; k<bs; k++) { 419 for (l=0; l<bs; l++) { 420 if (PetscRealPart(*aa++) != 0.) continue; 421 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 422 } 423 } 424 } 425 } 426 427 color = PETSC_DRAW_RED; 428 for (i=0,row=0; i<mbs; i++,row+=bs) { 429 for (j=a->i[i]; j<a->i[i+1]; j++) { 430 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 431 x_l = a->j[j]*bs; x_r = x_l + 1.0; 432 aa = a->a + j*bs2; 433 for (k=0; k<bs; k++) { 434 for (l=0; l<bs; l++) { 435 if (PetscRealPart(*aa++) <= 0.) continue; 436 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 437 } 438 } 439 } 440 } 441 PetscFunctionReturn(0); 442 } 443 444 #undef __FUNCT__ 445 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 446 static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 447 { 448 PetscErrorCode ierr; 449 PetscReal xl,yl,xr,yr,w,h; 450 PetscDraw draw; 451 PetscTruth isnull; 452 453 PetscFunctionBegin; 454 455 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 456 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 457 458 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 459 xr = A->m; yr = A->m; h = yr/10.0; w = xr/10.0; 460 xr += w; yr += h; xl = -w; yl = -h; 461 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 462 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 463 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 464 PetscFunctionReturn(0); 465 } 466 467 #undef __FUNCT__ 468 #define __FUNCT__ "MatView_SeqSBAIJ" 469 PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 470 { 471 PetscErrorCode ierr; 472 PetscTruth iascii,isdraw; 473 474 PetscFunctionBegin; 475 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 476 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 477 if (iascii){ 478 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 479 } else if (isdraw) { 480 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 481 } else { 482 Mat B; 483 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 484 ierr = MatView(B,viewer);CHKERRQ(ierr); 485 ierr = MatDestroy(B);CHKERRQ(ierr); 486 } 487 PetscFunctionReturn(0); 488 } 489 490 491 #undef __FUNCT__ 492 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 493 PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 494 { 495 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 496 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 497 PetscInt *ai = a->i,*ailen = a->ilen; 498 PetscInt brow,bcol,ridx,cidx,bs=A->bs,bs2=a->bs2; 499 MatScalar *ap,*aa = a->a,zero = 0.0; 500 501 PetscFunctionBegin; 502 for (k=0; k<m; k++) { /* loop over rows */ 503 row = im[k]; brow = row/bs; 504 if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",row); 505 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1); 506 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 507 nrow = ailen[brow]; 508 for (l=0; l<n; l++) { /* loop over columns */ 509 if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",in[l]); 510 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->n-1); 511 col = in[l] ; 512 bcol = col/bs; 513 cidx = col%bs; 514 ridx = row%bs; 515 high = nrow; 516 low = 0; /* assume unsorted */ 517 while (high-low > 5) { 518 t = (low+high)/2; 519 if (rp[t] > bcol) high = t; 520 else low = t; 521 } 522 for (i=low; i<high; i++) { 523 if (rp[i] > bcol) break; 524 if (rp[i] == bcol) { 525 *v++ = ap[bs2*i+bs*cidx+ridx]; 526 goto finished; 527 } 528 } 529 *v++ = zero; 530 finished:; 531 } 532 } 533 PetscFunctionReturn(0); 534 } 535 536 537 #undef __FUNCT__ 538 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 539 PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 540 { 541 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 542 PetscErrorCode ierr; 543 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 544 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 545 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->bs,stepval; 546 PetscTruth roworiented=a->roworiented; 547 const MatScalar *value = v; 548 MatScalar *ap,*aa = a->a,*bap; 549 550 PetscFunctionBegin; 551 if (roworiented) { 552 stepval = (n-1)*bs; 553 } else { 554 stepval = (m-1)*bs; 555 } 556 for (k=0; k<m; k++) { /* loop over added rows */ 557 row = im[k]; 558 if (row < 0) continue; 559 #if defined(PETSC_USE_DEBUG) 560 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 561 #endif 562 rp = aj + ai[row]; 563 ap = aa + bs2*ai[row]; 564 rmax = imax[row]; 565 nrow = ailen[row]; 566 low = 0; 567 high = nrow; 568 for (l=0; l<n; l++) { /* loop over added columns */ 569 if (in[l] < 0) continue; 570 col = in[l]; 571 #if defined(PETSC_USE_DEBUG) 572 if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",col,a->nbs-1); 573 #endif 574 if (col < row) continue; /* ignore lower triangular block */ 575 if (roworiented) { 576 value = v + k*(stepval+bs)*bs + l*bs; 577 } else { 578 value = v + l*(stepval+bs)*bs + k*bs; 579 } 580 if (col < lastcol) low = 0; else high = nrow; 581 lastcol = col; 582 while (high-low > 7) { 583 t = (low+high)/2; 584 if (rp[t] > col) high = t; 585 else low = t; 586 } 587 for (i=low; i<high; i++) { 588 if (rp[i] > col) break; 589 if (rp[i] == col) { 590 bap = ap + bs2*i; 591 if (roworiented) { 592 if (is == ADD_VALUES) { 593 for (ii=0; ii<bs; ii++,value+=stepval) { 594 for (jj=ii; jj<bs2; jj+=bs) { 595 bap[jj] += *value++; 596 } 597 } 598 } else { 599 for (ii=0; ii<bs; ii++,value+=stepval) { 600 for (jj=ii; jj<bs2; jj+=bs) { 601 bap[jj] = *value++; 602 } 603 } 604 } 605 } else { 606 if (is == ADD_VALUES) { 607 for (ii=0; ii<bs; ii++,value+=stepval) { 608 for (jj=0; jj<bs; jj++) { 609 *bap++ += *value++; 610 } 611 } 612 } else { 613 for (ii=0; ii<bs; ii++,value+=stepval) { 614 for (jj=0; jj<bs; jj++) { 615 *bap++ = *value++; 616 } 617 } 618 } 619 } 620 goto noinsert2; 621 } 622 } 623 if (nonew == 1) goto noinsert2; 624 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 625 if (nrow >= rmax) { 626 /* there is no extra room in row, therefore enlarge */ 627 PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 628 MatScalar *new_a; 629 630 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 631 632 /* malloc new storage space */ 633 len = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); 634 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 635 new_j = (PetscInt*)(new_a + bs2*new_nz); 636 new_i = new_j + new_nz; 637 638 /* copy over old data into new slots */ 639 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 640 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 641 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); 642 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 643 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); 644 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 645 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 646 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 647 /* free up old matrix storage */ 648 ierr = PetscFree(a->a);CHKERRQ(ierr); 649 if (!a->singlemalloc) { 650 ierr = PetscFree(a->i);CHKERRQ(ierr); 651 ierr = PetscFree(a->j);CHKERRQ(ierr); 652 } 653 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 654 a->singlemalloc = PETSC_TRUE; 655 656 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 657 rmax = imax[row] = imax[row] + CHUNKSIZE; 658 ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); 659 a->maxnz += bs2*CHUNKSIZE; 660 a->reallocs++; 661 a->nz++; 662 } 663 N = nrow++ - 1; 664 /* shift up all the later entries in this row */ 665 for (ii=N; ii>=i; ii--) { 666 rp[ii+1] = rp[ii]; 667 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 668 } 669 if (N >= i) { 670 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 671 } 672 rp[i] = col; 673 bap = ap + bs2*i; 674 if (roworiented) { 675 for (ii=0; ii<bs; ii++,value+=stepval) { 676 for (jj=ii; jj<bs2; jj+=bs) { 677 bap[jj] = *value++; 678 } 679 } 680 } else { 681 for (ii=0; ii<bs; ii++,value+=stepval) { 682 for (jj=0; jj<bs; jj++) { 683 *bap++ = *value++; 684 } 685 } 686 } 687 noinsert2:; 688 low = i; 689 } 690 ailen[row] = nrow; 691 } 692 PetscFunctionReturn(0); 693 } 694 695 #undef __FUNCT__ 696 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 697 PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 698 { 699 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 700 PetscErrorCode ierr; 701 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 702 PetscInt m = A->m,*ip,N,*ailen = a->ilen; 703 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 704 MatScalar *aa = a->a,*ap; 705 706 PetscFunctionBegin; 707 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 708 709 if (m) rmax = ailen[0]; 710 for (i=1; i<mbs; i++) { 711 /* move each row back by the amount of empty slots (fshift) before it*/ 712 fshift += imax[i-1] - ailen[i-1]; 713 rmax = PetscMax(rmax,ailen[i]); 714 if (fshift) { 715 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 716 N = ailen[i]; 717 for (j=0; j<N; j++) { 718 ip[j-fshift] = ip[j]; 719 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 720 } 721 } 722 ai[i] = ai[i-1] + ailen[i-1]; 723 } 724 if (mbs) { 725 fshift += imax[mbs-1] - ailen[mbs-1]; 726 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 727 } 728 /* reset ilen and imax for each row */ 729 for (i=0; i<mbs; i++) { 730 ailen[i] = imax[i] = ai[i+1] - ai[i]; 731 } 732 a->nz = ai[mbs]; 733 734 /* diagonals may have moved, reset it */ 735 if (a->diag) { 736 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 737 } 738 ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n", 739 m,A->m,A->bs,fshift*bs2,a->nz*bs2));CHKERRQ(ierr); 740 ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %D\n",a->reallocs));CHKERRQ(ierr); 741 ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %D\n",rmax));CHKERRQ(ierr); 742 a->reallocs = 0; 743 A->info.nz_unneeded = (PetscReal)fshift*bs2; 744 PetscFunctionReturn(0); 745 } 746 747 /* 748 This function returns an array of flags which indicate the locations of contiguous 749 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 750 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 751 Assume: sizes should be long enough to hold all the values. 752 */ 753 #undef __FUNCT__ 754 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 755 PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 756 { 757 PetscInt i,j,k,row; 758 PetscTruth flg; 759 760 PetscFunctionBegin; 761 for (i=0,j=0; i<n; j++) { 762 row = idx[i]; 763 if (row%bs!=0) { /* Not the begining of a block */ 764 sizes[j] = 1; 765 i++; 766 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 767 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 768 i++; 769 } else { /* Begining of the block, so check if the complete block exists */ 770 flg = PETSC_TRUE; 771 for (k=1; k<bs; k++) { 772 if (row+k != idx[i+k]) { /* break in the block */ 773 flg = PETSC_FALSE; 774 break; 775 } 776 } 777 if (flg) { /* No break in the bs */ 778 sizes[j] = bs; 779 i+= bs; 780 } else { 781 sizes[j] = 1; 782 i++; 783 } 784 } 785 } 786 *bs_max = j; 787 PetscFunctionReturn(0); 788 } 789 790 791 /* Only add/insert a(i,j) with i<=j (blocks). 792 Any a(i,j) with i>j input by user is ingored. 793 */ 794 795 #undef __FUNCT__ 796 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 797 PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 798 { 799 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 800 PetscErrorCode ierr; 801 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 802 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 803 PetscInt *aj=a->j,nonew=a->nonew,bs=A->bs,brow,bcol; 804 PetscInt ridx,cidx,bs2=a->bs2; 805 MatScalar *ap,value,*aa=a->a,*bap; 806 807 PetscFunctionBegin; 808 for (k=0; k<m; k++) { /* loop over added rows */ 809 row = im[k]; /* row number */ 810 brow = row/bs; /* block row number */ 811 if (row < 0) continue; 812 #if defined(PETSC_USE_DEBUG) 813 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1); 814 #endif 815 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 816 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 817 rmax = imax[brow]; /* maximum space allocated for this row */ 818 nrow = ailen[brow]; /* actual length of this row */ 819 low = 0; 820 821 for (l=0; l<n; l++) { /* loop over added columns */ 822 if (in[l] < 0) continue; 823 #if defined(PETSC_USE_DEBUG) 824 if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->m-1); 825 #endif 826 col = in[l]; 827 bcol = col/bs; /* block col number */ 828 829 if (brow > bcol) { 830 if (a->ignore_ltriangular){ 831 continue; /* ignore lower triangular values */ 832 } else { 833 SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR)"); 834 } 835 } 836 837 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 838 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 839 /* element value a(k,l) */ 840 if (roworiented) { 841 value = v[l + k*n]; 842 } else { 843 value = v[k + l*m]; 844 } 845 846 /* move pointer bap to a(k,l) quickly and add/insert value */ 847 if (col < lastcol) low = 0; high = nrow; 848 lastcol = col; 849 while (high-low > 7) { 850 t = (low+high)/2; 851 if (rp[t] > bcol) high = t; 852 else low = t; 853 } 854 for (i=low; i<high; i++) { 855 if (rp[i] > bcol) break; 856 if (rp[i] == bcol) { 857 bap = ap + bs2*i + bs*cidx + ridx; 858 if (is == ADD_VALUES) *bap += value; 859 else *bap = value; 860 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 861 if (brow == bcol && ridx < cidx){ 862 bap = ap + bs2*i + bs*ridx + cidx; 863 if (is == ADD_VALUES) *bap += value; 864 else *bap = value; 865 } 866 goto noinsert1; 867 } 868 } 869 870 if (nonew == 1) goto noinsert1; 871 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 872 if (nrow >= rmax) { 873 /* there is no extra room in row, therefore enlarge */ 874 PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 875 MatScalar *new_a; 876 877 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 878 879 /* Malloc new storage space */ 880 len = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); 881 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 882 new_j = (PetscInt*)(new_a + bs2*new_nz); 883 new_i = new_j + new_nz; 884 885 /* copy over old data into new slots */ 886 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 887 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 888 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); 889 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 890 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); 891 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 892 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 893 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 894 /* free up old matrix storage */ 895 ierr = PetscFree(a->a);CHKERRQ(ierr); 896 if (!a->singlemalloc) { 897 ierr = PetscFree(a->i);CHKERRQ(ierr); 898 ierr = PetscFree(a->j);CHKERRQ(ierr); 899 } 900 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 901 a->singlemalloc = PETSC_TRUE; 902 903 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 904 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 905 ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); 906 a->maxnz += bs2*CHUNKSIZE; 907 a->reallocs++; 908 a->nz++; 909 } 910 911 N = nrow++ - 1; 912 /* shift up all the later entries in this row */ 913 for (ii=N; ii>=i; ii--) { 914 rp[ii+1] = rp[ii]; 915 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 916 } 917 if (N>=i) { 918 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 919 } 920 rp[i] = bcol; 921 ap[bs2*i + bs*cidx + ridx] = value; 922 noinsert1:; 923 low = i; 924 } 925 } /* end of loop over added columns */ 926 ailen[brow] = nrow; 927 } /* end of loop over added rows */ 928 PetscFunctionReturn(0); 929 } 930 931 EXTERN PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*); 932 EXTERN PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*); 933 EXTERN PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat,PetscInt,IS[],PetscInt); 934 EXTERN PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,PetscInt,MatReuse,Mat*); 935 EXTERN PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat*[]); 936 EXTERN PetscErrorCode MatScale_SeqSBAIJ(const PetscScalar*,Mat); 937 EXTERN PetscErrorCode MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *); 938 EXTERN PetscErrorCode MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*); 939 EXTERN PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat,Vec); 940 EXTERN PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec); 941 EXTERN PetscErrorCode MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *); 942 EXTERN PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat); 943 EXTERN PetscErrorCode MatGetRowMax_SeqSBAIJ(Mat,Vec); 944 945 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N(Mat,Vec,Vec); 946 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1(Mat,Vec,Vec); 947 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2(Mat,Vec,Vec); 948 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3(Mat,Vec,Vec); 949 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4(Mat,Vec,Vec); 950 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5(Mat,Vec,Vec); 951 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6(Mat,Vec,Vec); 952 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7(Mat,Vec,Vec); 953 954 EXTERN PetscErrorCode MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs); 955 956 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec); 957 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec); 958 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec); 959 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 960 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec); 961 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec); 962 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec); 963 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec); 964 965 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,MatFactorInfo*,Mat*); 966 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,MatFactorInfo*,Mat*); 967 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,MatFactorInfo*,Mat*); 968 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,MatFactorInfo*,Mat*); 969 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,MatFactorInfo*,Mat*); 970 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,MatFactorInfo*,Mat*); 971 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,MatFactorInfo*,Mat*); 972 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,MatFactorInfo*,Mat*); 973 EXTERN PetscErrorCode MatGetInertia_SeqSBAIJ(Mat,PetscInt*,PetscInt*,PetscInt*); 974 975 EXTERN PetscErrorCode MatMult_SeqSBAIJ_1(Mat,Vec,Vec); 976 EXTERN PetscErrorCode MatMult_SeqSBAIJ_2(Mat,Vec,Vec); 977 EXTERN PetscErrorCode MatMult_SeqSBAIJ_3(Mat,Vec,Vec); 978 EXTERN PetscErrorCode MatMult_SeqSBAIJ_4(Mat,Vec,Vec); 979 EXTERN PetscErrorCode MatMult_SeqSBAIJ_5(Mat,Vec,Vec); 980 EXTERN PetscErrorCode MatMult_SeqSBAIJ_6(Mat,Vec,Vec); 981 EXTERN PetscErrorCode MatMult_SeqSBAIJ_7(Mat,Vec,Vec); 982 EXTERN PetscErrorCode MatMult_SeqSBAIJ_N(Mat,Vec,Vec); 983 984 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec); 985 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec); 986 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec); 987 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec); 988 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec); 989 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec); 990 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec); 991 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec); 992 993 #ifdef HAVE_MatICCFactor 994 /* modified from MatILUFactor_SeqSBAIJ, needs further work! */ 995 #undef __FUNCT__ 996 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 997 PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info) 998 { 999 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 1000 Mat outA; 1001 PetscErrorCode ierr; 1002 PetscTruth row_identity,col_identity; 1003 1004 PetscFunctionBegin; 1005 outA = inA; 1006 inA->factor = FACTOR_CHOLESKY; 1007 1008 if (!a->diag) { 1009 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 1010 } 1011 /* 1012 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1013 for ILU(0) factorization with natural ordering 1014 */ 1015 switch (a->bs) { 1016 case 1: 1017 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1018 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1019 inA->ops->solves = MatSolves_SeqSBAIJ_1; 1020 ierr = PetscLoginfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n"));CHKERRQ(ierr); 1021 case 2: 1022 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 1023 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1024 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1025 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"));CHKERRQ(ierr); 1026 break; 1027 case 3: 1028 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 1029 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1030 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1031 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n"));CHKERRQ(ierr); 1032 break; 1033 case 4: 1034 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 1035 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1036 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1037 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"));CHKERRQ(ierr); 1038 break; 1039 case 5: 1040 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 1041 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1042 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1043 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"));CHKERRQ(ierr); 1044 break; 1045 case 6: 1046 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 1047 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1048 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1049 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"));CHKERRQ(ierr); 1050 break; 1051 case 7: 1052 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 1053 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1054 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1055 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"));CHKERRQ(ierr); 1056 break; 1057 default: 1058 a->row = row; 1059 a->icol = col; 1060 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1061 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1062 1063 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1064 ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr); 1065 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 1066 1067 if (!a->solve_work) { 1068 ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1069 ierr = PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 1070 } 1071 } 1072 1073 ierr = MatCholeskyFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 1074 PetscFunctionReturn(0); 1075 } 1076 #endif 1077 1078 #undef __FUNCT__ 1079 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ" 1080 PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat A) 1081 { 1082 static PetscTruth called = PETSC_FALSE; 1083 MPI_Comm comm = A->comm; 1084 PetscErrorCode ierr; 1085 1086 PetscFunctionBegin; 1087 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1088 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1089 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1090 ierr = (*PetscHelpPrintf)(comm," -mat_ignore_ltriangular: Ignore lower triangular values set by user\n");CHKERRQ(ierr); 1091 PetscFunctionReturn(0); 1092 } 1093 1094 EXTERN_C_BEGIN 1095 #undef __FUNCT__ 1096 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1097 PetscErrorCode MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,PetscInt *indices) 1098 { 1099 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1100 PetscInt i,nz,n; 1101 1102 PetscFunctionBegin; 1103 nz = baij->maxnz; 1104 n = mat->n; 1105 for (i=0; i<nz; i++) { 1106 baij->j[i] = indices[i]; 1107 } 1108 baij->nz = nz; 1109 for (i=0; i<n; i++) { 1110 baij->ilen[i] = baij->imax[i]; 1111 } 1112 PetscFunctionReturn(0); 1113 } 1114 EXTERN_C_END 1115 1116 #undef __FUNCT__ 1117 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1118 /*@ 1119 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1120 in the matrix. 1121 1122 Input Parameters: 1123 + mat - the SeqSBAIJ matrix 1124 - indices - the column indices 1125 1126 Level: advanced 1127 1128 Notes: 1129 This can be called if you have precomputed the nonzero structure of the 1130 matrix and want to provide it to the matrix object to improve the performance 1131 of the MatSetValues() operation. 1132 1133 You MUST have set the correct numbers of nonzeros per row in the call to 1134 MatCreateSeqSBAIJ(), and the columns indices MUST be sorted. 1135 1136 MUST be called before any calls to MatSetValues() 1137 1138 .seealso: MatCreateSeqSBAIJ 1139 @*/ 1140 PetscErrorCode MatSeqSBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1141 { 1142 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1143 1144 PetscFunctionBegin; 1145 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1146 PetscValidPointer(indices,2); 1147 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1148 if (f) { 1149 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1150 } else { 1151 SETERRQ(PETSC_ERR_SUP,"Wrong type of matrix to set column indices"); 1152 } 1153 PetscFunctionReturn(0); 1154 } 1155 1156 #undef __FUNCT__ 1157 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1158 PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A) 1159 { 1160 PetscErrorCode ierr; 1161 1162 PetscFunctionBegin; 1163 ierr = MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1164 PetscFunctionReturn(0); 1165 } 1166 1167 #undef __FUNCT__ 1168 #define __FUNCT__ "MatGetArray_SeqSBAIJ" 1169 PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1170 { 1171 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1172 PetscFunctionBegin; 1173 *array = a->a; 1174 PetscFunctionReturn(0); 1175 } 1176 1177 #undef __FUNCT__ 1178 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ" 1179 PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1180 { 1181 PetscFunctionBegin; 1182 PetscFunctionReturn(0); 1183 } 1184 1185 #include "petscblaslapack.h" 1186 #undef __FUNCT__ 1187 #define __FUNCT__ "MatAXPY_SeqSBAIJ" 1188 PetscErrorCode MatAXPY_SeqSBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str) 1189 { 1190 Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data; 1191 PetscErrorCode ierr; 1192 PetscInt i,bs=Y->bs,bs2,j; 1193 PetscBLASInt bnz = (PetscBLASInt)x->nz,one = 1; 1194 1195 PetscFunctionBegin; 1196 if (str == SAME_NONZERO_PATTERN) { 1197 BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1198 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1199 if (y->xtoy && y->XtoY != X) { 1200 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1201 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1202 } 1203 if (!y->xtoy) { /* get xtoy */ 1204 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1205 y->XtoY = X; 1206 } 1207 bs2 = bs*bs; 1208 for (i=0; i<x->nz; i++) { 1209 j = 0; 1210 while (j < bs2){ 1211 y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]); 1212 j++; 1213 } 1214 } 1215 ierr = PetscLogInfo((0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)));CHKERRQ(ierr); 1216 } else { 1217 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1218 } 1219 PetscFunctionReturn(0); 1220 } 1221 1222 #undef __FUNCT__ 1223 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ" 1224 PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg) 1225 { 1226 PetscFunctionBegin; 1227 *flg = PETSC_TRUE; 1228 PetscFunctionReturn(0); 1229 } 1230 1231 #undef __FUNCT__ 1232 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ" 1233 PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg) 1234 { 1235 PetscFunctionBegin; 1236 *flg = PETSC_TRUE; 1237 PetscFunctionReturn(0); 1238 } 1239 1240 #undef __FUNCT__ 1241 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ" 1242 PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscTruth *flg) 1243 { 1244 PetscFunctionBegin; 1245 *flg = PETSC_FALSE; 1246 PetscFunctionReturn(0); 1247 } 1248 1249 /* -------------------------------------------------------------------*/ 1250 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1251 MatGetRow_SeqSBAIJ, 1252 MatRestoreRow_SeqSBAIJ, 1253 MatMult_SeqSBAIJ_N, 1254 /* 4*/ MatMultAdd_SeqSBAIJ_N, 1255 MatMult_SeqSBAIJ_N, 1256 MatMultAdd_SeqSBAIJ_N, 1257 MatSolve_SeqSBAIJ_N, 1258 0, 1259 0, 1260 /*10*/ 0, 1261 0, 1262 MatCholeskyFactor_SeqSBAIJ, 1263 MatRelax_SeqSBAIJ, 1264 MatTranspose_SeqSBAIJ, 1265 /*15*/ MatGetInfo_SeqSBAIJ, 1266 MatEqual_SeqSBAIJ, 1267 MatGetDiagonal_SeqSBAIJ, 1268 MatDiagonalScale_SeqSBAIJ, 1269 MatNorm_SeqSBAIJ, 1270 /*20*/ 0, 1271 MatAssemblyEnd_SeqSBAIJ, 1272 0, 1273 MatSetOption_SeqSBAIJ, 1274 MatZeroEntries_SeqSBAIJ, 1275 /*25*/ 0, 1276 0, 1277 0, 1278 MatCholeskyFactorSymbolic_SeqSBAIJ, 1279 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1280 /*30*/ MatSetUpPreallocation_SeqSBAIJ, 1281 0, 1282 MatICCFactorSymbolic_SeqSBAIJ, 1283 MatGetArray_SeqSBAIJ, 1284 MatRestoreArray_SeqSBAIJ, 1285 /*35*/ MatDuplicate_SeqSBAIJ, 1286 0, 1287 0, 1288 0, 1289 0, 1290 /*40*/ MatAXPY_SeqSBAIJ, 1291 MatGetSubMatrices_SeqSBAIJ, 1292 MatIncreaseOverlap_SeqSBAIJ, 1293 MatGetValues_SeqSBAIJ, 1294 0, 1295 /*45*/ MatPrintHelp_SeqSBAIJ, 1296 MatScale_SeqSBAIJ, 1297 0, 1298 0, 1299 0, 1300 /*50*/ 0, 1301 MatGetRowIJ_SeqSBAIJ, 1302 MatRestoreRowIJ_SeqSBAIJ, 1303 0, 1304 0, 1305 /*55*/ 0, 1306 0, 1307 0, 1308 0, 1309 MatSetValuesBlocked_SeqSBAIJ, 1310 /*60*/ MatGetSubMatrix_SeqSBAIJ, 1311 0, 1312 0, 1313 MatGetPetscMaps_Petsc, 1314 0, 1315 /*65*/ 0, 1316 0, 1317 0, 1318 0, 1319 0, 1320 /*70*/ MatGetRowMax_SeqSBAIJ, 1321 0, 1322 0, 1323 0, 1324 0, 1325 /*75*/ 0, 1326 0, 1327 0, 1328 0, 1329 0, 1330 /*80*/ 0, 1331 0, 1332 0, 1333 #if !defined(PETSC_USE_COMPLEX) 1334 MatGetInertia_SeqSBAIJ, 1335 #else 1336 0, 1337 #endif 1338 MatLoad_SeqSBAIJ, 1339 /*85*/ MatIsSymmetric_SeqSBAIJ, 1340 MatIsHermitian_SeqSBAIJ, 1341 MatIsStructurallySymmetric_SeqSBAIJ, 1342 0, 1343 0, 1344 /*90*/ 0, 1345 0, 1346 0, 1347 0, 1348 0, 1349 /*95*/ 0, 1350 0, 1351 0, 1352 0}; 1353 1354 EXTERN_C_BEGIN 1355 #undef __FUNCT__ 1356 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1357 PetscErrorCode MatStoreValues_SeqSBAIJ(Mat mat) 1358 { 1359 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1360 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 1361 PetscErrorCode ierr; 1362 1363 PetscFunctionBegin; 1364 if (aij->nonew != 1) { 1365 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1366 } 1367 1368 /* allocate space for values if not already there */ 1369 if (!aij->saved_values) { 1370 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1371 } 1372 1373 /* copy values over */ 1374 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1375 PetscFunctionReturn(0); 1376 } 1377 EXTERN_C_END 1378 1379 EXTERN_C_BEGIN 1380 #undef __FUNCT__ 1381 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1382 PetscErrorCode MatRetrieveValues_SeqSBAIJ(Mat mat) 1383 { 1384 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1385 PetscErrorCode ierr; 1386 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 1387 1388 PetscFunctionBegin; 1389 if (aij->nonew != 1) { 1390 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1391 } 1392 if (!aij->saved_values) { 1393 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 1394 } 1395 1396 /* copy values over */ 1397 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1398 PetscFunctionReturn(0); 1399 } 1400 EXTERN_C_END 1401 1402 EXTERN_C_BEGIN 1403 #undef __FUNCT__ 1404 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ" 1405 PetscErrorCode MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 1406 { 1407 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1408 PetscErrorCode ierr; 1409 PetscInt i,len,mbs,bs2; 1410 PetscTruth flg; 1411 1412 PetscFunctionBegin; 1413 B->preallocated = PETSC_TRUE; 1414 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1415 mbs = B->m/bs; 1416 bs2 = bs*bs; 1417 1418 if (mbs*bs != B->m) { 1419 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1420 } 1421 1422 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1423 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 1424 if (nnz) { 1425 for (i=0; i<mbs; i++) { 1426 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 1427 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],mbs); 1428 } 1429 } 1430 1431 ierr = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1432 if (!flg) { 1433 switch (bs) { 1434 case 1: 1435 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1436 B->ops->solve = MatSolve_SeqSBAIJ_1; 1437 B->ops->solves = MatSolves_SeqSBAIJ_1; 1438 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 1439 B->ops->mult = MatMult_SeqSBAIJ_1; 1440 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1441 break; 1442 case 2: 1443 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1444 B->ops->solve = MatSolve_SeqSBAIJ_2; 1445 B->ops->solvetranspose = MatSolve_SeqSBAIJ_2; 1446 B->ops->mult = MatMult_SeqSBAIJ_2; 1447 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1448 break; 1449 case 3: 1450 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1451 B->ops->solve = MatSolve_SeqSBAIJ_3; 1452 B->ops->solvetranspose = MatSolve_SeqSBAIJ_3; 1453 B->ops->mult = MatMult_SeqSBAIJ_3; 1454 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1455 break; 1456 case 4: 1457 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1458 B->ops->solve = MatSolve_SeqSBAIJ_4; 1459 B->ops->solvetranspose = MatSolve_SeqSBAIJ_4; 1460 B->ops->mult = MatMult_SeqSBAIJ_4; 1461 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1462 break; 1463 case 5: 1464 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1465 B->ops->solve = MatSolve_SeqSBAIJ_5; 1466 B->ops->solvetranspose = MatSolve_SeqSBAIJ_5; 1467 B->ops->mult = MatMult_SeqSBAIJ_5; 1468 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1469 break; 1470 case 6: 1471 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1472 B->ops->solve = MatSolve_SeqSBAIJ_6; 1473 B->ops->solvetranspose = MatSolve_SeqSBAIJ_6; 1474 B->ops->mult = MatMult_SeqSBAIJ_6; 1475 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1476 break; 1477 case 7: 1478 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1479 B->ops->solve = MatSolve_SeqSBAIJ_7; 1480 B->ops->solvetranspose = MatSolve_SeqSBAIJ_7; 1481 B->ops->mult = MatMult_SeqSBAIJ_7; 1482 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1483 break; 1484 } 1485 } 1486 1487 b->mbs = mbs; 1488 b->nbs = mbs; 1489 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->imax);CHKERRQ(ierr); 1490 if (!nnz) { 1491 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1492 else if (nz <= 0) nz = 1; 1493 for (i=0; i<mbs; i++) { 1494 b->imax[i] = nz; 1495 } 1496 nz = nz*mbs; /* total nz */ 1497 } else { 1498 nz = 0; 1499 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1500 } 1501 /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1502 1503 /* allocate the matrix space */ 1504 len = nz*sizeof(PetscInt) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(PetscInt); 1505 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1506 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1507 b->j = (PetscInt*)(b->a + nz*bs2); 1508 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1509 b->i = b->j + nz; 1510 b->singlemalloc = PETSC_TRUE; 1511 1512 /* pointer to beginning of each row */ 1513 b->i[0] = 0; 1514 for (i=1; i<mbs+1; i++) { 1515 b->i[i] = b->i[i-1] + b->imax[i-1]; 1516 } 1517 1518 /* b->ilen will count nonzeros in each block row so far. */ 1519 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->ilen);CHKERRQ(ierr); 1520 ierr = PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr); 1521 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1522 1523 B->bs = bs; 1524 b->bs2 = bs2; 1525 b->nz = 0; 1526 b->maxnz = nz*bs2; 1527 1528 b->inew = 0; 1529 b->jnew = 0; 1530 b->anew = 0; 1531 b->a2anew = 0; 1532 b->permute = PETSC_FALSE; 1533 PetscFunctionReturn(0); 1534 } 1535 EXTERN_C_END 1536 1537 EXTERN_C_BEGIN 1538 EXTERN PetscErrorCode MatConvert_SeqSBAIJ_SeqAIJ(Mat,const MatType,MatReuse,Mat*); 1539 EXTERN PetscErrorCode MatConvert_SeqSBAIJ_SeqBAIJ(Mat,const MatType,MatReuse,Mat*); 1540 EXTERN_C_END 1541 1542 /*MC 1543 MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices, 1544 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1545 1546 Options Database Keys: 1547 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions() 1548 1549 Level: beginner 1550 1551 .seealso: MatCreateSeqSBAIJ 1552 M*/ 1553 1554 EXTERN_C_BEGIN 1555 #undef __FUNCT__ 1556 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1557 PetscErrorCode MatCreate_SeqSBAIJ(Mat B) 1558 { 1559 Mat_SeqSBAIJ *b; 1560 PetscErrorCode ierr; 1561 PetscMPIInt size; 1562 PetscTruth flg; 1563 1564 PetscFunctionBegin; 1565 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1566 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1567 B->m = B->M = PetscMax(B->m,B->M); 1568 B->n = B->N = PetscMax(B->n,B->N); 1569 1570 ierr = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1571 B->data = (void*)b; 1572 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1573 B->ops->destroy = MatDestroy_SeqSBAIJ; 1574 B->ops->view = MatView_SeqSBAIJ; 1575 B->factor = 0; 1576 B->mapping = 0; 1577 b->row = 0; 1578 b->icol = 0; 1579 b->reallocs = 0; 1580 b->saved_values = 0; 1581 1582 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1583 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1584 1585 b->sorted = PETSC_FALSE; 1586 b->roworiented = PETSC_TRUE; 1587 b->nonew = 0; 1588 b->diag = 0; 1589 b->solve_work = 0; 1590 b->mult_work = 0; 1591 B->spptr = 0; 1592 b->keepzeroedrows = PETSC_FALSE; 1593 b->xtoy = 0; 1594 b->XtoY = 0; 1595 1596 b->inew = 0; 1597 b->jnew = 0; 1598 b->anew = 0; 1599 b->a2anew = 0; 1600 b->permute = PETSC_FALSE; 1601 1602 b->ignore_ltriangular = PETSC_FALSE; 1603 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_ignore_lower_triangular",&flg);CHKERRQ(ierr); 1604 if (flg) b->ignore_ltriangular = PETSC_TRUE; 1605 1606 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1607 "MatStoreValues_SeqSBAIJ", 1608 MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1609 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1610 "MatRetrieveValues_SeqSBAIJ", 1611 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1612 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1613 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1614 MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1615 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C", 1616 "MatConvert_SeqSBAIJ_SeqAIJ", 1617 MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr); 1618 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C", 1619 "MatConvert_SeqSBAIJ_SeqBAIJ", 1620 MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr); 1621 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C", 1622 "MatSeqSBAIJSetPreallocation_SeqSBAIJ", 1623 MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr); 1624 1625 B->symmetric = PETSC_TRUE; 1626 B->structurally_symmetric = PETSC_TRUE; 1627 B->symmetric_set = PETSC_TRUE; 1628 B->structurally_symmetric_set = PETSC_TRUE; 1629 PetscFunctionReturn(0); 1630 } 1631 EXTERN_C_END 1632 1633 #undef __FUNCT__ 1634 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1635 /*@C 1636 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1637 compressed row) format. For good matrix assembly performance the 1638 user should preallocate the matrix storage by setting the parameter nz 1639 (or the array nnz). By setting these parameters accurately, performance 1640 during matrix assembly can be increased by more than a factor of 50. 1641 1642 Collective on Mat 1643 1644 Input Parameters: 1645 + A - the symmetric matrix 1646 . bs - size of block 1647 . nz - number of block nonzeros per block row (same for all rows) 1648 - nnz - array containing the number of block nonzeros in the upper triangular plus 1649 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1650 1651 Options Database Keys: 1652 . -mat_no_unroll - uses code that does not unroll the loops in the 1653 block calculations (much slower) 1654 . -mat_block_size - size of the blocks to use 1655 1656 Level: intermediate 1657 1658 Notes: 1659 Specify the preallocated storage with either nz or nnz (not both). 1660 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1661 allocation. For additional details, see the users manual chapter on 1662 matrices. 1663 1664 If the nnz parameter is given then the nz parameter is ignored 1665 1666 1667 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1668 @*/ 1669 PetscErrorCode MatSeqSBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 1670 { 1671 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 1672 1673 PetscFunctionBegin; 1674 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1675 if (f) { 1676 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 1677 } 1678 PetscFunctionReturn(0); 1679 } 1680 1681 #undef __FUNCT__ 1682 #define __FUNCT__ "MatCreateSeqSBAIJ" 1683 /*@C 1684 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1685 compressed row) format. For good matrix assembly performance the 1686 user should preallocate the matrix storage by setting the parameter nz 1687 (or the array nnz). By setting these parameters accurately, performance 1688 during matrix assembly can be increased by more than a factor of 50. 1689 1690 Collective on MPI_Comm 1691 1692 Input Parameters: 1693 + comm - MPI communicator, set to PETSC_COMM_SELF 1694 . bs - size of block 1695 . m - number of rows, or number of columns 1696 . nz - number of block nonzeros per block row (same for all rows) 1697 - nnz - array containing the number of block nonzeros in the upper triangular plus 1698 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1699 1700 Output Parameter: 1701 . A - the symmetric matrix 1702 1703 Options Database Keys: 1704 . -mat_no_unroll - uses code that does not unroll the loops in the 1705 block calculations (much slower) 1706 . -mat_block_size - size of the blocks to use 1707 1708 Level: intermediate 1709 1710 Notes: 1711 1712 Specify the preallocated storage with either nz or nnz (not both). 1713 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1714 allocation. For additional details, see the users manual chapter on 1715 matrices. 1716 1717 If the nnz parameter is given then the nz parameter is ignored 1718 1719 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1720 @*/ 1721 PetscErrorCode MatCreateSeqSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 1722 { 1723 PetscErrorCode ierr; 1724 1725 PetscFunctionBegin; 1726 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1727 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1728 ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1729 PetscFunctionReturn(0); 1730 } 1731 1732 #undef __FUNCT__ 1733 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1734 PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1735 { 1736 Mat C; 1737 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1738 PetscErrorCode ierr; 1739 PetscInt i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 1740 1741 PetscFunctionBegin; 1742 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1743 1744 *B = 0; 1745 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1746 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 1747 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1748 c = (Mat_SeqSBAIJ*)C->data; 1749 1750 C->preallocated = PETSC_TRUE; 1751 C->factor = A->factor; 1752 c->row = 0; 1753 c->icol = 0; 1754 c->saved_values = 0; 1755 c->keepzeroedrows = a->keepzeroedrows; 1756 C->assembled = PETSC_TRUE; 1757 1758 C->M = A->M; 1759 C->N = A->N; 1760 C->bs = A->bs; 1761 c->bs2 = a->bs2; 1762 c->mbs = a->mbs; 1763 c->nbs = a->nbs; 1764 1765 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->imax);CHKERRQ(ierr); 1766 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->ilen);CHKERRQ(ierr); 1767 for (i=0; i<mbs; i++) { 1768 c->imax[i] = a->imax[i]; 1769 c->ilen[i] = a->ilen[i]; 1770 } 1771 1772 /* allocate the matrix space */ 1773 c->singlemalloc = PETSC_TRUE; 1774 len = (mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt)); 1775 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1776 c->j = (PetscInt*)(c->a + nz*bs2); 1777 c->i = c->j + nz; 1778 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1779 if (mbs > 0) { 1780 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1781 if (cpvalues == MAT_COPY_VALUES) { 1782 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1783 } else { 1784 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1785 } 1786 } 1787 1788 ierr = PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr); 1789 c->sorted = a->sorted; 1790 c->roworiented = a->roworiented; 1791 c->nonew = a->nonew; 1792 1793 if (a->diag) { 1794 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 1795 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1796 for (i=0; i<mbs; i++) { 1797 c->diag[i] = a->diag[i]; 1798 } 1799 } else c->diag = 0; 1800 c->nz = a->nz; 1801 c->maxnz = a->maxnz; 1802 c->solve_work = 0; 1803 c->mult_work = 0; 1804 *B = C; 1805 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1806 PetscFunctionReturn(0); 1807 } 1808 1809 #undef __FUNCT__ 1810 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1811 PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer,const MatType type,Mat *A) 1812 { 1813 Mat_SeqSBAIJ *a; 1814 Mat B; 1815 PetscErrorCode ierr; 1816 int fd; 1817 PetscMPIInt size; 1818 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 1819 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 1820 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 1821 PetscInt *masked,nmask,tmp,bs2,ishift; 1822 PetscScalar *aa; 1823 MPI_Comm comm = ((PetscObject)viewer)->comm; 1824 1825 PetscFunctionBegin; 1826 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1827 bs2 = bs*bs; 1828 1829 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1830 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1831 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1832 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1833 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1834 M = header[1]; N = header[2]; nz = header[3]; 1835 1836 if (header[3] < 0) { 1837 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1838 } 1839 1840 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1841 1842 /* 1843 This code adds extra rows to make sure the number of rows is 1844 divisible by the blocksize 1845 */ 1846 mbs = M/bs; 1847 extra_rows = bs - M + bs*(mbs); 1848 if (extra_rows == bs) extra_rows = 0; 1849 else mbs++; 1850 if (extra_rows) { 1851 ierr = PetscLogInfo((0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr); 1852 } 1853 1854 /* read in row lengths */ 1855 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1856 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1857 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1858 1859 /* read in column indices */ 1860 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1861 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1862 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1863 1864 /* loop over row lengths determining block row lengths */ 1865 ierr = PetscMalloc(mbs*sizeof(PetscInt),&s_browlengths);CHKERRQ(ierr); 1866 ierr = PetscMemzero(s_browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1867 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 1868 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1869 masked = mask + mbs; 1870 rowcount = 0; nzcount = 0; 1871 for (i=0; i<mbs; i++) { 1872 nmask = 0; 1873 for (j=0; j<bs; j++) { 1874 kmax = rowlengths[rowcount]; 1875 for (k=0; k<kmax; k++) { 1876 tmp = jj[nzcount++]/bs; /* block col. index */ 1877 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1878 } 1879 rowcount++; 1880 } 1881 s_browlengths[i] += nmask; 1882 1883 /* zero out the mask elements we set */ 1884 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1885 } 1886 1887 /* create our matrix */ 1888 ierr = MatCreate(comm,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows,&B);CHKERRQ(ierr); 1889 ierr = MatSetType(B,type);CHKERRQ(ierr); 1890 ierr = MatSeqSBAIJSetPreallocation(B,bs,0,s_browlengths);CHKERRQ(ierr); 1891 a = (Mat_SeqSBAIJ*)B->data; 1892 1893 /* set matrix "i" values */ 1894 a->i[0] = 0; 1895 for (i=1; i<= mbs; i++) { 1896 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1897 a->ilen[i-1] = s_browlengths[i-1]; 1898 } 1899 a->nz = a->i[mbs]; 1900 1901 /* read in nonzero values */ 1902 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1903 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1904 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1905 1906 /* set "a" and "j" values into matrix */ 1907 nzcount = 0; jcount = 0; 1908 for (i=0; i<mbs; i++) { 1909 nzcountb = nzcount; 1910 nmask = 0; 1911 for (j=0; j<bs; j++) { 1912 kmax = rowlengths[i*bs+j]; 1913 for (k=0; k<kmax; k++) { 1914 tmp = jj[nzcount++]/bs; /* block col. index */ 1915 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1916 } 1917 } 1918 /* sort the masked values */ 1919 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1920 1921 /* set "j" values into matrix */ 1922 maskcount = 1; 1923 for (j=0; j<nmask; j++) { 1924 a->j[jcount++] = masked[j]; 1925 mask[masked[j]] = maskcount++; 1926 } 1927 1928 /* set "a" values into matrix */ 1929 ishift = bs2*a->i[i]; 1930 for (j=0; j<bs; j++) { 1931 kmax = rowlengths[i*bs+j]; 1932 for (k=0; k<kmax; k++) { 1933 tmp = jj[nzcountb]/bs ; /* block col. index */ 1934 if (tmp >= i){ 1935 block = mask[tmp] - 1; 1936 point = jj[nzcountb] - bs*tmp; 1937 idx = ishift + bs2*block + j + bs*point; 1938 a->a[idx] = aa[nzcountb]; 1939 } 1940 nzcountb++; 1941 } 1942 } 1943 /* zero out the mask elements we set */ 1944 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1945 } 1946 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1947 1948 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1949 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1950 ierr = PetscFree(aa);CHKERRQ(ierr); 1951 ierr = PetscFree(jj);CHKERRQ(ierr); 1952 ierr = PetscFree(mask);CHKERRQ(ierr); 1953 1954 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1955 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1956 ierr = MatView_Private(B);CHKERRQ(ierr); 1957 *A = B; 1958 PetscFunctionReturn(0); 1959 } 1960 1961 #undef __FUNCT__ 1962 #define __FUNCT__ "MatRelax_SeqSBAIJ" 1963 PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1964 { 1965 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1966 MatScalar *aa=a->a,*v,*v1; 1967 PetscScalar *x,*b,*t,sum,d; 1968 PetscErrorCode ierr; 1969 PetscInt m=a->mbs,bs=A->bs,*ai=a->i,*aj=a->j; 1970 PetscInt nz,nz1,*vj,*vj1,i; 1971 1972 PetscFunctionBegin; 1973 its = its*lits; 1974 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1975 1976 if (bs > 1) 1977 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 1978 1979 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1980 if (xx != bb) { 1981 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1982 } else { 1983 b = x; 1984 } 1985 1986 ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr); 1987 1988 if (flag & SOR_ZERO_INITIAL_GUESS) { 1989 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1990 for (i=0; i<m; i++) 1991 t[i] = b[i]; 1992 1993 for (i=0; i<m; i++){ 1994 d = *(aa + ai[i]); /* diag[i] */ 1995 v = aa + ai[i] + 1; 1996 vj = aj + ai[i] + 1; 1997 nz = ai[i+1] - ai[i] - 1; 1998 x[i] = omega*t[i]/d; 1999 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 2000 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2001 } 2002 } 2003 2004 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2005 for (i=0; i<m; i++) 2006 t[i] = b[i]; 2007 2008 for (i=0; i<m-1; i++){ /* update rhs */ 2009 v = aa + ai[i] + 1; 2010 vj = aj + ai[i] + 1; 2011 nz = ai[i+1] - ai[i] - 1; 2012 while (nz--) t[*vj++] -= x[i]*(*v++); 2013 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2014 } 2015 for (i=m-1; i>=0; i--){ 2016 d = *(aa + ai[i]); 2017 v = aa + ai[i] + 1; 2018 vj = aj + ai[i] + 1; 2019 nz = ai[i+1] - ai[i] - 1; 2020 sum = t[i]; 2021 while (nz--) sum -= x[*vj++]*(*v++); 2022 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2023 x[i] = (1-omega)*x[i] + omega*sum/d; 2024 } 2025 } 2026 its--; 2027 } 2028 2029 while (its--) { 2030 /* 2031 forward sweep: 2032 for i=0,...,m-1: 2033 sum[i] = (b[i] - U(i,:)x )/d[i]; 2034 x[i] = (1-omega)x[i] + omega*sum[i]; 2035 b = b - x[i]*U^T(i,:); 2036 2037 */ 2038 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2039 for (i=0; i<m; i++) 2040 t[i] = b[i]; 2041 2042 for (i=0; i<m; i++){ 2043 d = *(aa + ai[i]); /* diag[i] */ 2044 v = aa + ai[i] + 1; v1=v; 2045 vj = aj + ai[i] + 1; vj1=vj; 2046 nz = ai[i+1] - ai[i] - 1; nz1=nz; 2047 sum = t[i]; 2048 while (nz1--) sum -= (*v1++)*x[*vj1++]; 2049 x[i] = (1-omega)*x[i] + omega*sum/d; 2050 while (nz--) t[*vj++] -= x[i]*(*v++); 2051 ierr = PetscLogFlops(4*nz-2);CHKERRQ(ierr); 2052 } 2053 } 2054 2055 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2056 /* 2057 backward sweep: 2058 b = b - x[i]*U^T(i,:), i=0,...,n-2 2059 for i=m-1,...,0: 2060 sum[i] = (b[i] - U(i,:)x )/d[i]; 2061 x[i] = (1-omega)x[i] + omega*sum[i]; 2062 */ 2063 for (i=0; i<m; i++) 2064 t[i] = b[i]; 2065 2066 for (i=0; i<m-1; i++){ /* update rhs */ 2067 v = aa + ai[i] + 1; 2068 vj = aj + ai[i] + 1; 2069 nz = ai[i+1] - ai[i] - 1; 2070 while (nz--) t[*vj++] -= x[i]*(*v++); 2071 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2072 } 2073 for (i=m-1; i>=0; i--){ 2074 d = *(aa + ai[i]); 2075 v = aa + ai[i] + 1; 2076 vj = aj + ai[i] + 1; 2077 nz = ai[i+1] - ai[i] - 1; 2078 sum = t[i]; 2079 while (nz--) sum -= x[*vj++]*(*v++); 2080 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2081 x[i] = (1-omega)*x[i] + omega*sum/d; 2082 } 2083 } 2084 } 2085 2086 ierr = PetscFree(t);CHKERRQ(ierr); 2087 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2088 if (bb != xx) { 2089 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2090 } 2091 PetscFunctionReturn(0); 2092 } 2093 2094 2095 2096 2097 2098 2099