1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the SBAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 8 #include "src/inline/spops.h" 9 #include "src/mat/impls/sbaij/seq/sbaij.h" 10 11 #define CHUNKSIZE 10 12 13 /* 14 Checks for missing diagonals 15 */ 16 #undef __FUNCT__ 17 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 18 PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A) 19 { 20 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 21 PetscErrorCode ierr; 22 PetscInt *diag,*jj = a->j,i; 23 24 PetscFunctionBegin; 25 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 26 diag = a->diag; 27 for (i=0; i<a->mbs; i++) { 28 if (jj[diag[i]] != i) SETERRQ1(PETSC_ERR_ARG_CORRUPT,"Matrix is missing diagonal number %D",i); 29 } 30 PetscFunctionReturn(0); 31 } 32 33 #undef __FUNCT__ 34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 35 PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A) 36 { 37 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 38 PetscErrorCode ierr; 39 PetscInt i,mbs = a->mbs; 40 41 PetscFunctionBegin; 42 if (a->diag) PetscFunctionReturn(0); 43 44 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 45 ierr = PetscLogObjectMemory(A,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 46 for (i=0; i<mbs; i++) a->diag[i] = a->i[i]; 47 PetscFunctionReturn(0); 48 } 49 50 #undef __FUNCT__ 51 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 52 static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 53 { 54 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 55 PetscInt n = a->mbs,i; 56 57 PetscFunctionBegin; 58 *nn = n; 59 if (!ia) PetscFunctionReturn(0); 60 61 if (oshift == 1) { 62 /* temporarily add 1 to i and j indices */ 63 PetscInt nz = a->i[n]; 64 for (i=0; i<nz; i++) a->j[i]++; 65 for (i=0; i<n+1; i++) a->i[i]++; 66 *ia = a->i; *ja = a->j; 67 } else { 68 *ia = a->i; *ja = a->j; 69 } 70 PetscFunctionReturn(0); 71 } 72 73 #undef __FUNCT__ 74 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 75 static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 76 { 77 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 78 PetscInt i,n = a->mbs; 79 80 PetscFunctionBegin; 81 if (!ia) PetscFunctionReturn(0); 82 83 if (oshift == 1) { 84 PetscInt nz = a->i[n]-1; 85 for (i=0; i<nz; i++) a->j[i]--; 86 for (i=0; i<n+1; i++) a->i[i]--; 87 } 88 PetscFunctionReturn(0); 89 } 90 91 #undef __FUNCT__ 92 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 93 PetscErrorCode MatDestroy_SeqSBAIJ(Mat A) 94 { 95 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 96 PetscErrorCode ierr; 97 98 PetscFunctionBegin; 99 PetscLogObjectState((PetscObject)A,"Rows=%D, NZ=%D",A->m,a->nz); 100 ierr = MatSeqXAIJFreeAIJ(a->singlemalloc,a->a,a->j,a->i);CHKERRQ(ierr); 101 if (a->row) { 102 ierr = ISDestroy(a->row);CHKERRQ(ierr); 103 } 104 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 105 if (a->imax) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 106 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 107 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 108 if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);} 109 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 110 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 111 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 112 113 if (a->inew){ 114 ierr = PetscFree(a->inew);CHKERRQ(ierr); 115 a->inew = 0; 116 } 117 ierr = PetscFree(a);CHKERRQ(ierr); 118 119 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 120 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 121 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 122 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 123 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqbaij_C","",PETSC_NULL);CHKERRQ(ierr); 124 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 125 PetscFunctionReturn(0); 126 } 127 128 #undef __FUNCT__ 129 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 130 PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op) 131 { 132 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 133 PetscErrorCode ierr; 134 135 PetscFunctionBegin; 136 switch (op) { 137 case MAT_ROW_ORIENTED: 138 a->roworiented = PETSC_TRUE; 139 break; 140 case MAT_COLUMN_ORIENTED: 141 a->roworiented = PETSC_FALSE; 142 break; 143 case MAT_COLUMNS_SORTED: 144 a->sorted = PETSC_TRUE; 145 break; 146 case MAT_COLUMNS_UNSORTED: 147 a->sorted = PETSC_FALSE; 148 break; 149 case MAT_KEEP_ZEROED_ROWS: 150 a->keepzeroedrows = PETSC_TRUE; 151 break; 152 case MAT_NO_NEW_NONZERO_LOCATIONS: 153 a->nonew = 1; 154 break; 155 case MAT_NEW_NONZERO_LOCATION_ERR: 156 a->nonew = -1; 157 break; 158 case MAT_NEW_NONZERO_ALLOCATION_ERR: 159 a->nonew = -2; 160 break; 161 case MAT_YES_NEW_NONZERO_LOCATIONS: 162 a->nonew = 0; 163 break; 164 case MAT_ROWS_SORTED: 165 case MAT_ROWS_UNSORTED: 166 case MAT_YES_NEW_DIAGONALS: 167 case MAT_IGNORE_OFF_PROC_ENTRIES: 168 case MAT_USE_HASH_TABLE: 169 ierr = PetscLogInfo((A,"MatSetOption_SeqSBAIJ:Option ignored\n"));CHKERRQ(ierr); 170 break; 171 case MAT_NO_NEW_DIAGONALS: 172 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 173 case MAT_NOT_SYMMETRIC: 174 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 175 case MAT_HERMITIAN: 176 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 177 case MAT_SYMMETRIC: 178 case MAT_STRUCTURALLY_SYMMETRIC: 179 case MAT_NOT_HERMITIAN: 180 case MAT_SYMMETRY_ETERNAL: 181 case MAT_NOT_SYMMETRY_ETERNAL: 182 case MAT_IGNORE_LOWER_TRIANGULAR: 183 a->ignore_ltriangular = PETSC_TRUE; 184 break; 185 case MAT_ERROR_LOWER_TRIANGULAR: 186 a->ignore_ltriangular = PETSC_FALSE; 187 break; 188 default: 189 SETERRQ(PETSC_ERR_SUP,"unknown option"); 190 } 191 PetscFunctionReturn(0); 192 } 193 194 #undef __FUNCT__ 195 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 196 PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **v) 197 { 198 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 199 PetscErrorCode ierr; 200 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2; 201 MatScalar *aa,*aa_i; 202 PetscScalar *v_i; 203 204 PetscFunctionBegin; 205 bs = A->bs; 206 ai = a->i; 207 aj = a->j; 208 aa = a->a; 209 bs2 = a->bs2; 210 211 if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %D out of range", row); 212 213 bn = row/bs; /* Block number */ 214 bp = row % bs; /* Block position */ 215 M = ai[bn+1] - ai[bn]; 216 *ncols = bs*M; 217 218 if (v) { 219 *v = 0; 220 if (*ncols) { 221 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 222 for (i=0; i<M; i++) { /* for each block in the block row */ 223 v_i = *v + i*bs; 224 aa_i = aa + bs2*(ai[bn] + i); 225 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 226 } 227 } 228 } 229 230 if (cols) { 231 *cols = 0; 232 if (*ncols) { 233 ierr = PetscMalloc((*ncols+row)*sizeof(PetscInt),cols);CHKERRQ(ierr); 234 for (i=0; i<M; i++) { /* for each block in the block row */ 235 cols_i = *cols + i*bs; 236 itmp = bs*aj[ai[bn] + i]; 237 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 238 } 239 } 240 } 241 242 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 243 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 244 #ifdef column_search 245 v_i = *v + M*bs; 246 cols_i = *cols + M*bs; 247 for (i=0; i<bn; i++){ /* for each block row */ 248 M = ai[i+1] - ai[i]; 249 for (j=0; j<M; j++){ 250 itmp = aj[ai[i] + j]; /* block column value */ 251 if (itmp == bn){ 252 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 253 for (k=0; k<bs; k++) { 254 *cols_i++ = i*bs+k; 255 *v_i++ = aa_i[k]; 256 } 257 *ncols += bs; 258 break; 259 } 260 } 261 } 262 #endif 263 PetscFunctionReturn(0); 264 } 265 266 #undef __FUNCT__ 267 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 268 PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 269 { 270 PetscErrorCode ierr; 271 272 PetscFunctionBegin; 273 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 274 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 275 PetscFunctionReturn(0); 276 } 277 278 #undef __FUNCT__ 279 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 280 PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,Mat *B) 281 { 282 PetscErrorCode ierr; 283 PetscFunctionBegin; 284 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 285 PetscFunctionReturn(0); 286 } 287 288 #undef __FUNCT__ 289 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 290 static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 291 { 292 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 293 PetscErrorCode ierr; 294 PetscInt i,j,bs = A->bs,k,l,bs2=a->bs2; 295 char *name; 296 PetscViewerFormat format; 297 298 PetscFunctionBegin; 299 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 300 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 301 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 302 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 303 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 304 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 305 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 306 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 307 for (i=0; i<a->mbs; i++) { 308 for (j=0; j<bs; j++) { 309 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 310 for (k=a->i[i]; k<a->i[i+1]; k++) { 311 for (l=0; l<bs; l++) { 312 #if defined(PETSC_USE_COMPLEX) 313 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 314 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 315 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 316 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 317 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 318 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 319 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 320 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 321 } 322 #else 323 if (a->a[bs2*k + l*bs + j] != 0.0) { 324 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 325 } 326 #endif 327 } 328 } 329 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 330 } 331 } 332 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 333 } else { 334 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 335 for (i=0; i<a->mbs; i++) { 336 for (j=0; j<bs; j++) { 337 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 338 for (k=a->i[i]; k<a->i[i+1]; k++) { 339 for (l=0; l<bs; l++) { 340 #if defined(PETSC_USE_COMPLEX) 341 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 342 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 343 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 344 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 345 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 346 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 347 } else { 348 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 349 } 350 #else 351 ierr = PetscViewerASCIIPrintf(viewer," %D %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 352 #endif 353 } 354 } 355 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 356 } 357 } 358 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 359 } 360 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 361 PetscFunctionReturn(0); 362 } 363 364 #undef __FUNCT__ 365 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 366 static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 367 { 368 Mat A = (Mat) Aa; 369 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 370 PetscErrorCode ierr; 371 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->bs,bs2=a->bs2; 372 PetscMPIInt rank; 373 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 374 MatScalar *aa; 375 MPI_Comm comm; 376 PetscViewer viewer; 377 378 PetscFunctionBegin; 379 /* 380 This is nasty. If this is called from an originally parallel matrix 381 then all processes call this,but only the first has the matrix so the 382 rest should return immediately. 383 */ 384 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 385 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 386 if (rank) PetscFunctionReturn(0); 387 388 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 389 390 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 391 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 392 393 /* loop over matrix elements drawing boxes */ 394 color = PETSC_DRAW_BLUE; 395 for (i=0,row=0; i<mbs; i++,row+=bs) { 396 for (j=a->i[i]; j<a->i[i+1]; j++) { 397 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 398 x_l = a->j[j]*bs; x_r = x_l + 1.0; 399 aa = a->a + j*bs2; 400 for (k=0; k<bs; k++) { 401 for (l=0; l<bs; l++) { 402 if (PetscRealPart(*aa++) >= 0.) continue; 403 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 404 } 405 } 406 } 407 } 408 color = PETSC_DRAW_CYAN; 409 for (i=0,row=0; i<mbs; i++,row+=bs) { 410 for (j=a->i[i]; j<a->i[i+1]; j++) { 411 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 412 x_l = a->j[j]*bs; x_r = x_l + 1.0; 413 aa = a->a + j*bs2; 414 for (k=0; k<bs; k++) { 415 for (l=0; l<bs; l++) { 416 if (PetscRealPart(*aa++) != 0.) continue; 417 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 418 } 419 } 420 } 421 } 422 423 color = PETSC_DRAW_RED; 424 for (i=0,row=0; i<mbs; i++,row+=bs) { 425 for (j=a->i[i]; j<a->i[i+1]; j++) { 426 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 427 x_l = a->j[j]*bs; x_r = x_l + 1.0; 428 aa = a->a + j*bs2; 429 for (k=0; k<bs; k++) { 430 for (l=0; l<bs; l++) { 431 if (PetscRealPart(*aa++) <= 0.) continue; 432 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 433 } 434 } 435 } 436 } 437 PetscFunctionReturn(0); 438 } 439 440 #undef __FUNCT__ 441 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 442 static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 443 { 444 PetscErrorCode ierr; 445 PetscReal xl,yl,xr,yr,w,h; 446 PetscDraw draw; 447 PetscTruth isnull; 448 449 PetscFunctionBegin; 450 451 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 452 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 453 454 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 455 xr = A->m; yr = A->m; h = yr/10.0; w = xr/10.0; 456 xr += w; yr += h; xl = -w; yl = -h; 457 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 458 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 459 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 460 PetscFunctionReturn(0); 461 } 462 463 #undef __FUNCT__ 464 #define __FUNCT__ "MatView_SeqSBAIJ" 465 PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 466 { 467 PetscErrorCode ierr; 468 PetscTruth iascii,isdraw; 469 470 PetscFunctionBegin; 471 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 472 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 473 if (iascii){ 474 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 475 } else if (isdraw) { 476 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 477 } else { 478 Mat B; 479 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 480 ierr = MatView(B,viewer);CHKERRQ(ierr); 481 ierr = MatDestroy(B);CHKERRQ(ierr); 482 } 483 PetscFunctionReturn(0); 484 } 485 486 487 #undef __FUNCT__ 488 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 489 PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 490 { 491 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 492 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 493 PetscInt *ai = a->i,*ailen = a->ilen; 494 PetscInt brow,bcol,ridx,cidx,bs=A->bs,bs2=a->bs2; 495 MatScalar *ap,*aa = a->a,zero = 0.0; 496 497 PetscFunctionBegin; 498 for (k=0; k<m; k++) { /* loop over rows */ 499 row = im[k]; brow = row/bs; 500 if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",row); 501 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1); 502 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 503 nrow = ailen[brow]; 504 for (l=0; l<n; l++) { /* loop over columns */ 505 if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",in[l]); 506 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->n-1); 507 col = in[l] ; 508 bcol = col/bs; 509 cidx = col%bs; 510 ridx = row%bs; 511 high = nrow; 512 low = 0; /* assume unsorted */ 513 while (high-low > 5) { 514 t = (low+high)/2; 515 if (rp[t] > bcol) high = t; 516 else low = t; 517 } 518 for (i=low; i<high; i++) { 519 if (rp[i] > bcol) break; 520 if (rp[i] == bcol) { 521 *v++ = ap[bs2*i+bs*cidx+ridx]; 522 goto finished; 523 } 524 } 525 *v++ = zero; 526 finished:; 527 } 528 } 529 PetscFunctionReturn(0); 530 } 531 532 533 #undef __FUNCT__ 534 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 535 PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 536 { 537 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 538 PetscErrorCode ierr; 539 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 540 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 541 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->bs,stepval; 542 PetscTruth roworiented=a->roworiented; 543 const MatScalar *value = v; 544 MatScalar *ap,*aa = a->a,*bap; 545 546 PetscFunctionBegin; 547 if (roworiented) { 548 stepval = (n-1)*bs; 549 } else { 550 stepval = (m-1)*bs; 551 } 552 for (k=0; k<m; k++) { /* loop over added rows */ 553 row = im[k]; 554 if (row < 0) continue; 555 #if defined(PETSC_USE_DEBUG) 556 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 557 #endif 558 rp = aj + ai[row]; 559 ap = aa + bs2*ai[row]; 560 rmax = imax[row]; 561 nrow = ailen[row]; 562 low = 0; 563 high = nrow; 564 for (l=0; l<n; l++) { /* loop over added columns */ 565 if (in[l] < 0) continue; 566 col = in[l]; 567 #if defined(PETSC_USE_DEBUG) 568 if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",col,a->nbs-1); 569 #endif 570 if (col < row) continue; /* ignore lower triangular block */ 571 if (roworiented) { 572 value = v + k*(stepval+bs)*bs + l*bs; 573 } else { 574 value = v + l*(stepval+bs)*bs + k*bs; 575 } 576 if (col < lastcol) low = 0; else high = nrow; 577 lastcol = col; 578 while (high-low > 7) { 579 t = (low+high)/2; 580 if (rp[t] > col) high = t; 581 else low = t; 582 } 583 for (i=low; i<high; i++) { 584 if (rp[i] > col) break; 585 if (rp[i] == col) { 586 bap = ap + bs2*i; 587 if (roworiented) { 588 if (is == ADD_VALUES) { 589 for (ii=0; ii<bs; ii++,value+=stepval) { 590 for (jj=ii; jj<bs2; jj+=bs) { 591 bap[jj] += *value++; 592 } 593 } 594 } else { 595 for (ii=0; ii<bs; ii++,value+=stepval) { 596 for (jj=ii; jj<bs2; jj+=bs) { 597 bap[jj] = *value++; 598 } 599 } 600 } 601 } else { 602 if (is == ADD_VALUES) { 603 for (ii=0; ii<bs; ii++,value+=stepval) { 604 for (jj=0; jj<bs; jj++) { 605 *bap++ += *value++; 606 } 607 } 608 } else { 609 for (ii=0; ii<bs; ii++,value+=stepval) { 610 for (jj=0; jj<bs; jj++) { 611 *bap++ = *value++; 612 } 613 } 614 } 615 } 616 goto noinsert2; 617 } 618 } 619 if (nonew == 1) goto noinsert2; 620 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 621 if (nrow >= rmax) { 622 /* there is no extra room in row, therefore enlarge */ 623 PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 624 MatScalar *new_a; 625 626 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 627 628 /* malloc new storage space */ 629 ierr = PetscMalloc3(bs2*new_nz,PetscScalar,&new_a,new_nz,PetscInt,&new_j,a->mbs+1,PetscInt,&new_i);CHKERRQ(ierr); 630 631 /* copy over old data into new slots */ 632 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 633 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 634 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); 635 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 636 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); 637 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 638 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 639 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 640 /* free up old matrix storage */ 641 ierr = MatSeqXAIJFreeAIJ(a->singlemalloc,a->a,a->j,a->i);CHKERRQ(ierr); 642 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 643 a->singlemalloc = PETSC_TRUE; 644 645 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 646 rmax = imax[row] = imax[row] + CHUNKSIZE; 647 ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); 648 a->maxnz += bs2*CHUNKSIZE; 649 a->reallocs++; 650 a->nz++; 651 } 652 N = nrow++ - 1; 653 /* shift up all the later entries in this row */ 654 for (ii=N; ii>=i; ii--) { 655 rp[ii+1] = rp[ii]; 656 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 657 } 658 if (N >= i) { 659 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 660 } 661 rp[i] = col; 662 bap = ap + bs2*i; 663 if (roworiented) { 664 for (ii=0; ii<bs; ii++,value+=stepval) { 665 for (jj=ii; jj<bs2; jj+=bs) { 666 bap[jj] = *value++; 667 } 668 } 669 } else { 670 for (ii=0; ii<bs; ii++,value+=stepval) { 671 for (jj=0; jj<bs; jj++) { 672 *bap++ = *value++; 673 } 674 } 675 } 676 noinsert2:; 677 low = i; 678 } 679 ailen[row] = nrow; 680 } 681 PetscFunctionReturn(0); 682 } 683 684 #undef __FUNCT__ 685 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 686 PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 687 { 688 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 689 PetscErrorCode ierr; 690 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 691 PetscInt m = A->m,*ip,N,*ailen = a->ilen; 692 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 693 MatScalar *aa = a->a,*ap; 694 695 PetscFunctionBegin; 696 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 697 698 if (m) rmax = ailen[0]; 699 for (i=1; i<mbs; i++) { 700 /* move each row back by the amount of empty slots (fshift) before it*/ 701 fshift += imax[i-1] - ailen[i-1]; 702 rmax = PetscMax(rmax,ailen[i]); 703 if (fshift) { 704 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 705 N = ailen[i]; 706 for (j=0; j<N; j++) { 707 ip[j-fshift] = ip[j]; 708 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 709 } 710 } 711 ai[i] = ai[i-1] + ailen[i-1]; 712 } 713 if (mbs) { 714 fshift += imax[mbs-1] - ailen[mbs-1]; 715 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 716 } 717 /* reset ilen and imax for each row */ 718 for (i=0; i<mbs; i++) { 719 ailen[i] = imax[i] = ai[i+1] - ai[i]; 720 } 721 a->nz = ai[mbs]; 722 723 /* diagonals may have moved, reset it */ 724 if (a->diag) { 725 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 726 } 727 ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n", 728 m,A->m,A->bs,fshift*bs2,a->nz*bs2));CHKERRQ(ierr); 729 ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %D\n",a->reallocs));CHKERRQ(ierr); 730 ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %D\n",rmax));CHKERRQ(ierr); 731 a->reallocs = 0; 732 A->info.nz_unneeded = (PetscReal)fshift*bs2; 733 PetscFunctionReturn(0); 734 } 735 736 /* 737 This function returns an array of flags which indicate the locations of contiguous 738 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 739 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 740 Assume: sizes should be long enough to hold all the values. 741 */ 742 #undef __FUNCT__ 743 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 744 PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 745 { 746 PetscInt i,j,k,row; 747 PetscTruth flg; 748 749 PetscFunctionBegin; 750 for (i=0,j=0; i<n; j++) { 751 row = idx[i]; 752 if (row%bs!=0) { /* Not the begining of a block */ 753 sizes[j] = 1; 754 i++; 755 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 756 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 757 i++; 758 } else { /* Begining of the block, so check if the complete block exists */ 759 flg = PETSC_TRUE; 760 for (k=1; k<bs; k++) { 761 if (row+k != idx[i+k]) { /* break in the block */ 762 flg = PETSC_FALSE; 763 break; 764 } 765 } 766 if (flg) { /* No break in the bs */ 767 sizes[j] = bs; 768 i+= bs; 769 } else { 770 sizes[j] = 1; 771 i++; 772 } 773 } 774 } 775 *bs_max = j; 776 PetscFunctionReturn(0); 777 } 778 779 780 /* Only add/insert a(i,j) with i<=j (blocks). 781 Any a(i,j) with i>j input by user is ingored. 782 */ 783 784 #undef __FUNCT__ 785 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 786 PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 787 { 788 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 789 PetscErrorCode ierr; 790 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 791 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 792 PetscInt *aj=a->j,nonew=a->nonew,bs=A->bs,brow,bcol; 793 PetscInt ridx,cidx,bs2=a->bs2; 794 MatScalar *ap,value,*aa=a->a,*bap; 795 796 PetscFunctionBegin; 797 for (k=0; k<m; k++) { /* loop over added rows */ 798 row = im[k]; /* row number */ 799 brow = row/bs; /* block row number */ 800 if (row < 0) continue; 801 #if defined(PETSC_USE_DEBUG) 802 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1); 803 #endif 804 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 805 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 806 rmax = imax[brow]; /* maximum space allocated for this row */ 807 nrow = ailen[brow]; /* actual length of this row */ 808 low = 0; 809 810 for (l=0; l<n; l++) { /* loop over added columns */ 811 if (in[l] < 0) continue; 812 #if defined(PETSC_USE_DEBUG) 813 if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->m-1); 814 #endif 815 col = in[l]; 816 bcol = col/bs; /* block col number */ 817 818 if (brow > bcol) { 819 if (a->ignore_ltriangular){ 820 continue; /* ignore lower triangular values */ 821 } else { 822 SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR)"); 823 } 824 } 825 826 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 827 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 828 /* element value a(k,l) */ 829 if (roworiented) { 830 value = v[l + k*n]; 831 } else { 832 value = v[k + l*m]; 833 } 834 835 /* move pointer bap to a(k,l) quickly and add/insert value */ 836 if (col < lastcol) low = 0; high = nrow; 837 lastcol = col; 838 while (high-low > 7) { 839 t = (low+high)/2; 840 if (rp[t] > bcol) high = t; 841 else low = t; 842 } 843 for (i=low; i<high; i++) { 844 if (rp[i] > bcol) break; 845 if (rp[i] == bcol) { 846 bap = ap + bs2*i + bs*cidx + ridx; 847 if (is == ADD_VALUES) *bap += value; 848 else *bap = value; 849 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 850 if (brow == bcol && ridx < cidx){ 851 bap = ap + bs2*i + bs*ridx + cidx; 852 if (is == ADD_VALUES) *bap += value; 853 else *bap = value; 854 } 855 goto noinsert1; 856 } 857 } 858 859 if (nonew == 1) goto noinsert1; 860 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 861 if (nrow >= rmax) { 862 /* there is no extra room in row, therefore enlarge */ 863 PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 864 MatScalar *new_a; 865 866 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 867 868 /* Malloc new storage space */ 869 ierr = PetscMalloc3(bs2*new_nz,PetscScalar,&new_a,new_nz,PetscInt,&new_j,a->mbs+1,PetscInt,&new_i);CHKERRQ(ierr); 870 871 /* copy over old data into new slots */ 872 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 873 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 874 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); 875 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 876 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); 877 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 878 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 879 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 880 /* free up old matrix storage */ 881 ierr = MatSeqXAIJFreeAIJ(a->singlemalloc,a->a,a->j,a->i);CHKERRQ(ierr); 882 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 883 a->singlemalloc = PETSC_TRUE; 884 885 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 886 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 887 ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); 888 a->maxnz += bs2*CHUNKSIZE; 889 a->reallocs++; 890 a->nz++; 891 } 892 893 N = nrow++ - 1; 894 /* shift up all the later entries in this row */ 895 for (ii=N; ii>=i; ii--) { 896 rp[ii+1] = rp[ii]; 897 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 898 } 899 if (N>=i) { 900 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 901 } 902 rp[i] = bcol; 903 ap[bs2*i + bs*cidx + ridx] = value; 904 noinsert1:; 905 low = i; 906 } 907 } /* end of loop over added columns */ 908 ailen[brow] = nrow; 909 } /* end of loop over added rows */ 910 PetscFunctionReturn(0); 911 } 912 913 EXTERN PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*); 914 EXTERN PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*); 915 EXTERN PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat,PetscInt,IS[],PetscInt); 916 EXTERN PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,PetscInt,MatReuse,Mat*); 917 EXTERN PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat*[]); 918 EXTERN PetscErrorCode MatScale_SeqSBAIJ(const PetscScalar*,Mat); 919 EXTERN PetscErrorCode MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *); 920 EXTERN PetscErrorCode MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*); 921 EXTERN PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat,Vec); 922 EXTERN PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec); 923 EXTERN PetscErrorCode MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *); 924 EXTERN PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat); 925 EXTERN PetscErrorCode MatGetRowMax_SeqSBAIJ(Mat,Vec); 926 927 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N(Mat,Vec,Vec); 928 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1(Mat,Vec,Vec); 929 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2(Mat,Vec,Vec); 930 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3(Mat,Vec,Vec); 931 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4(Mat,Vec,Vec); 932 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5(Mat,Vec,Vec); 933 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6(Mat,Vec,Vec); 934 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7(Mat,Vec,Vec); 935 936 EXTERN PetscErrorCode MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs); 937 938 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec); 939 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec); 940 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec); 941 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 942 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec); 943 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec); 944 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec); 945 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec); 946 947 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,MatFactorInfo*,Mat*); 948 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,MatFactorInfo*,Mat*); 949 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,MatFactorInfo*,Mat*); 950 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,MatFactorInfo*,Mat*); 951 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,MatFactorInfo*,Mat*); 952 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,MatFactorInfo*,Mat*); 953 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,MatFactorInfo*,Mat*); 954 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,MatFactorInfo*,Mat*); 955 EXTERN PetscErrorCode MatGetInertia_SeqSBAIJ(Mat,PetscInt*,PetscInt*,PetscInt*); 956 957 EXTERN PetscErrorCode MatMult_SeqSBAIJ_1(Mat,Vec,Vec); 958 EXTERN PetscErrorCode MatMult_SeqSBAIJ_2(Mat,Vec,Vec); 959 EXTERN PetscErrorCode MatMult_SeqSBAIJ_3(Mat,Vec,Vec); 960 EXTERN PetscErrorCode MatMult_SeqSBAIJ_4(Mat,Vec,Vec); 961 EXTERN PetscErrorCode MatMult_SeqSBAIJ_5(Mat,Vec,Vec); 962 EXTERN PetscErrorCode MatMult_SeqSBAIJ_6(Mat,Vec,Vec); 963 EXTERN PetscErrorCode MatMult_SeqSBAIJ_7(Mat,Vec,Vec); 964 EXTERN PetscErrorCode MatMult_SeqSBAIJ_N(Mat,Vec,Vec); 965 966 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec); 967 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec); 968 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec); 969 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec); 970 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec); 971 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec); 972 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec); 973 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec); 974 975 #ifdef HAVE_MatICCFactor 976 /* modified from MatILUFactor_SeqSBAIJ, needs further work! */ 977 #undef __FUNCT__ 978 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 979 PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info) 980 { 981 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 982 Mat outA; 983 PetscErrorCode ierr; 984 PetscTruth row_identity,col_identity; 985 986 PetscFunctionBegin; 987 outA = inA; 988 inA->factor = FACTOR_CHOLESKY; 989 990 if (!a->diag) { 991 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 992 } 993 /* 994 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 995 for ILU(0) factorization with natural ordering 996 */ 997 switch (a->bs) { 998 case 1: 999 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1000 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1001 inA->ops->solves = MatSolves_SeqSBAIJ_1; 1002 ierr = PetscLoginfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n"));CHKERRQ(ierr); 1003 case 2: 1004 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 1005 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1006 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1007 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"));CHKERRQ(ierr); 1008 break; 1009 case 3: 1010 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 1011 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1012 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1013 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n"));CHKERRQ(ierr); 1014 break; 1015 case 4: 1016 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 1017 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1018 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1019 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"));CHKERRQ(ierr); 1020 break; 1021 case 5: 1022 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 1023 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1024 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1025 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"));CHKERRQ(ierr); 1026 break; 1027 case 6: 1028 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 1029 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1030 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1031 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"));CHKERRQ(ierr); 1032 break; 1033 case 7: 1034 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 1035 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1036 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1037 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"));CHKERRQ(ierr); 1038 break; 1039 default: 1040 a->row = row; 1041 a->icol = col; 1042 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1043 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1044 1045 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1046 ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr); 1047 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 1048 1049 if (!a->solve_work) { 1050 ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1051 ierr = PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 1052 } 1053 } 1054 1055 ierr = MatCholeskyFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 1056 PetscFunctionReturn(0); 1057 } 1058 #endif 1059 1060 #undef __FUNCT__ 1061 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ" 1062 PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat A) 1063 { 1064 static PetscTruth called = PETSC_FALSE; 1065 MPI_Comm comm = A->comm; 1066 PetscErrorCode ierr; 1067 1068 PetscFunctionBegin; 1069 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1070 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1071 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1072 ierr = (*PetscHelpPrintf)(comm," -mat_ignore_ltriangular: Ignore lower triangular values set by user\n");CHKERRQ(ierr); 1073 PetscFunctionReturn(0); 1074 } 1075 1076 EXTERN_C_BEGIN 1077 #undef __FUNCT__ 1078 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1079 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,PetscInt *indices) 1080 { 1081 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1082 PetscInt i,nz,n; 1083 1084 PetscFunctionBegin; 1085 nz = baij->maxnz; 1086 n = mat->n; 1087 for (i=0; i<nz; i++) { 1088 baij->j[i] = indices[i]; 1089 } 1090 baij->nz = nz; 1091 for (i=0; i<n; i++) { 1092 baij->ilen[i] = baij->imax[i]; 1093 } 1094 PetscFunctionReturn(0); 1095 } 1096 EXTERN_C_END 1097 1098 #undef __FUNCT__ 1099 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1100 /*@ 1101 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1102 in the matrix. 1103 1104 Input Parameters: 1105 + mat - the SeqSBAIJ matrix 1106 - indices - the column indices 1107 1108 Level: advanced 1109 1110 Notes: 1111 This can be called if you have precomputed the nonzero structure of the 1112 matrix and want to provide it to the matrix object to improve the performance 1113 of the MatSetValues() operation. 1114 1115 You MUST have set the correct numbers of nonzeros per row in the call to 1116 MatCreateSeqSBAIJ(), and the columns indices MUST be sorted. 1117 1118 MUST be called before any calls to MatSetValues() 1119 1120 .seealso: MatCreateSeqSBAIJ 1121 @*/ 1122 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1123 { 1124 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1125 1126 PetscFunctionBegin; 1127 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1128 PetscValidPointer(indices,2); 1129 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1130 if (f) { 1131 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1132 } else { 1133 SETERRQ(PETSC_ERR_SUP,"Wrong type of matrix to set column indices"); 1134 } 1135 PetscFunctionReturn(0); 1136 } 1137 1138 #undef __FUNCT__ 1139 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1140 PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A) 1141 { 1142 PetscErrorCode ierr; 1143 1144 PetscFunctionBegin; 1145 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1146 PetscFunctionReturn(0); 1147 } 1148 1149 #undef __FUNCT__ 1150 #define __FUNCT__ "MatGetArray_SeqSBAIJ" 1151 PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1152 { 1153 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1154 PetscFunctionBegin; 1155 *array = a->a; 1156 PetscFunctionReturn(0); 1157 } 1158 1159 #undef __FUNCT__ 1160 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ" 1161 PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1162 { 1163 PetscFunctionBegin; 1164 PetscFunctionReturn(0); 1165 } 1166 1167 #include "petscblaslapack.h" 1168 #undef __FUNCT__ 1169 #define __FUNCT__ "MatAXPY_SeqSBAIJ" 1170 PetscErrorCode MatAXPY_SeqSBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str) 1171 { 1172 Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data; 1173 PetscErrorCode ierr; 1174 PetscInt i,bs=Y->bs,bs2,j; 1175 PetscBLASInt bnz = (PetscBLASInt)x->nz,one = 1; 1176 1177 PetscFunctionBegin; 1178 if (str == SAME_NONZERO_PATTERN) { 1179 BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1180 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1181 if (y->xtoy && y->XtoY != X) { 1182 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1183 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1184 } 1185 if (!y->xtoy) { /* get xtoy */ 1186 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1187 y->XtoY = X; 1188 } 1189 bs2 = bs*bs; 1190 for (i=0; i<x->nz; i++) { 1191 j = 0; 1192 while (j < bs2){ 1193 y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]); 1194 j++; 1195 } 1196 } 1197 ierr = PetscLogInfo((0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)));CHKERRQ(ierr); 1198 } else { 1199 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1200 } 1201 PetscFunctionReturn(0); 1202 } 1203 1204 #undef __FUNCT__ 1205 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ" 1206 PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg) 1207 { 1208 PetscFunctionBegin; 1209 *flg = PETSC_TRUE; 1210 PetscFunctionReturn(0); 1211 } 1212 1213 #undef __FUNCT__ 1214 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ" 1215 PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg) 1216 { 1217 PetscFunctionBegin; 1218 *flg = PETSC_TRUE; 1219 PetscFunctionReturn(0); 1220 } 1221 1222 #undef __FUNCT__ 1223 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ" 1224 PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscTruth *flg) 1225 { 1226 PetscFunctionBegin; 1227 *flg = PETSC_FALSE; 1228 PetscFunctionReturn(0); 1229 } 1230 1231 /* -------------------------------------------------------------------*/ 1232 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1233 MatGetRow_SeqSBAIJ, 1234 MatRestoreRow_SeqSBAIJ, 1235 MatMult_SeqSBAIJ_N, 1236 /* 4*/ MatMultAdd_SeqSBAIJ_N, 1237 MatMult_SeqSBAIJ_N, 1238 MatMultAdd_SeqSBAIJ_N, 1239 MatSolve_SeqSBAIJ_N, 1240 0, 1241 0, 1242 /*10*/ 0, 1243 0, 1244 MatCholeskyFactor_SeqSBAIJ, 1245 MatRelax_SeqSBAIJ, 1246 MatTranspose_SeqSBAIJ, 1247 /*15*/ MatGetInfo_SeqSBAIJ, 1248 MatEqual_SeqSBAIJ, 1249 MatGetDiagonal_SeqSBAIJ, 1250 MatDiagonalScale_SeqSBAIJ, 1251 MatNorm_SeqSBAIJ, 1252 /*20*/ 0, 1253 MatAssemblyEnd_SeqSBAIJ, 1254 0, 1255 MatSetOption_SeqSBAIJ, 1256 MatZeroEntries_SeqSBAIJ, 1257 /*25*/ 0, 1258 0, 1259 0, 1260 MatCholeskyFactorSymbolic_SeqSBAIJ, 1261 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1262 /*30*/ MatSetUpPreallocation_SeqSBAIJ, 1263 0, 1264 MatICCFactorSymbolic_SeqSBAIJ, 1265 MatGetArray_SeqSBAIJ, 1266 MatRestoreArray_SeqSBAIJ, 1267 /*35*/ MatDuplicate_SeqSBAIJ, 1268 0, 1269 0, 1270 0, 1271 0, 1272 /*40*/ MatAXPY_SeqSBAIJ, 1273 MatGetSubMatrices_SeqSBAIJ, 1274 MatIncreaseOverlap_SeqSBAIJ, 1275 MatGetValues_SeqSBAIJ, 1276 0, 1277 /*45*/ MatPrintHelp_SeqSBAIJ, 1278 MatScale_SeqSBAIJ, 1279 0, 1280 0, 1281 0, 1282 /*50*/ 0, 1283 MatGetRowIJ_SeqSBAIJ, 1284 MatRestoreRowIJ_SeqSBAIJ, 1285 0, 1286 0, 1287 /*55*/ 0, 1288 0, 1289 0, 1290 0, 1291 MatSetValuesBlocked_SeqSBAIJ, 1292 /*60*/ MatGetSubMatrix_SeqSBAIJ, 1293 0, 1294 0, 1295 MatGetPetscMaps_Petsc, 1296 0, 1297 /*65*/ 0, 1298 0, 1299 0, 1300 0, 1301 0, 1302 /*70*/ MatGetRowMax_SeqSBAIJ, 1303 0, 1304 0, 1305 0, 1306 0, 1307 /*75*/ 0, 1308 0, 1309 0, 1310 0, 1311 0, 1312 /*80*/ 0, 1313 0, 1314 0, 1315 #if !defined(PETSC_USE_COMPLEX) 1316 MatGetInertia_SeqSBAIJ, 1317 #else 1318 0, 1319 #endif 1320 MatLoad_SeqSBAIJ, 1321 /*85*/ MatIsSymmetric_SeqSBAIJ, 1322 MatIsHermitian_SeqSBAIJ, 1323 MatIsStructurallySymmetric_SeqSBAIJ, 1324 0, 1325 0, 1326 /*90*/ 0, 1327 0, 1328 0, 1329 0, 1330 0, 1331 /*95*/ 0, 1332 0, 1333 0, 1334 0}; 1335 1336 EXTERN_C_BEGIN 1337 #undef __FUNCT__ 1338 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1339 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqSBAIJ(Mat mat) 1340 { 1341 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1342 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 1343 PetscErrorCode ierr; 1344 1345 PetscFunctionBegin; 1346 if (aij->nonew != 1) { 1347 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1348 } 1349 1350 /* allocate space for values if not already there */ 1351 if (!aij->saved_values) { 1352 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1353 } 1354 1355 /* copy values over */ 1356 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1357 PetscFunctionReturn(0); 1358 } 1359 EXTERN_C_END 1360 1361 EXTERN_C_BEGIN 1362 #undef __FUNCT__ 1363 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1364 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqSBAIJ(Mat mat) 1365 { 1366 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1367 PetscErrorCode ierr; 1368 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 1369 1370 PetscFunctionBegin; 1371 if (aij->nonew != 1) { 1372 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1373 } 1374 if (!aij->saved_values) { 1375 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 1376 } 1377 1378 /* copy values over */ 1379 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1380 PetscFunctionReturn(0); 1381 } 1382 EXTERN_C_END 1383 1384 EXTERN_C_BEGIN 1385 #undef __FUNCT__ 1386 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ" 1387 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 1388 { 1389 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1390 PetscErrorCode ierr; 1391 PetscInt i,mbs,bs2; 1392 PetscTruth skipallocation = PETSC_FALSE,flg; 1393 1394 PetscFunctionBegin; 1395 B->preallocated = PETSC_TRUE; 1396 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1397 mbs = B->m/bs; 1398 bs2 = bs*bs; 1399 1400 if (mbs*bs != B->m) { 1401 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1402 } 1403 1404 if (nz == MAT_SKIP_ALLOCATION) { 1405 skipallocation = PETSC_TRUE; 1406 nz = 0; 1407 } 1408 1409 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1410 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 1411 if (nnz) { 1412 for (i=0; i<mbs; i++) { 1413 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 1414 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],mbs); 1415 } 1416 } 1417 1418 ierr = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1419 if (!flg) { 1420 switch (bs) { 1421 case 1: 1422 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1423 B->ops->solve = MatSolve_SeqSBAIJ_1; 1424 B->ops->solves = MatSolves_SeqSBAIJ_1; 1425 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 1426 B->ops->mult = MatMult_SeqSBAIJ_1; 1427 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1428 break; 1429 case 2: 1430 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1431 B->ops->solve = MatSolve_SeqSBAIJ_2; 1432 B->ops->solvetranspose = MatSolve_SeqSBAIJ_2; 1433 B->ops->mult = MatMult_SeqSBAIJ_2; 1434 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1435 break; 1436 case 3: 1437 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1438 B->ops->solve = MatSolve_SeqSBAIJ_3; 1439 B->ops->solvetranspose = MatSolve_SeqSBAIJ_3; 1440 B->ops->mult = MatMult_SeqSBAIJ_3; 1441 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1442 break; 1443 case 4: 1444 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1445 B->ops->solve = MatSolve_SeqSBAIJ_4; 1446 B->ops->solvetranspose = MatSolve_SeqSBAIJ_4; 1447 B->ops->mult = MatMult_SeqSBAIJ_4; 1448 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1449 break; 1450 case 5: 1451 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1452 B->ops->solve = MatSolve_SeqSBAIJ_5; 1453 B->ops->solvetranspose = MatSolve_SeqSBAIJ_5; 1454 B->ops->mult = MatMult_SeqSBAIJ_5; 1455 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1456 break; 1457 case 6: 1458 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1459 B->ops->solve = MatSolve_SeqSBAIJ_6; 1460 B->ops->solvetranspose = MatSolve_SeqSBAIJ_6; 1461 B->ops->mult = MatMult_SeqSBAIJ_6; 1462 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1463 break; 1464 case 7: 1465 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1466 B->ops->solve = MatSolve_SeqSBAIJ_7; 1467 B->ops->solvetranspose = MatSolve_SeqSBAIJ_7; 1468 B->ops->mult = MatMult_SeqSBAIJ_7; 1469 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1470 break; 1471 } 1472 } 1473 1474 b->mbs = mbs; 1475 b->nbs = mbs; 1476 if (!skipallocation) { 1477 /* b->ilen will count nonzeros in each block row so far. */ 1478 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 1479 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1480 if (!nnz) { 1481 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1482 else if (nz <= 0) nz = 1; 1483 for (i=0; i<mbs; i++) { 1484 b->imax[i] = nz; 1485 } 1486 nz = nz*mbs; /* total nz */ 1487 } else { 1488 nz = 0; 1489 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1490 } 1491 /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1492 1493 /* allocate the matrix space */ 1494 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->m+1,PetscInt,&b->i);CHKERRQ(ierr); 1495 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1496 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1497 b->singlemalloc = PETSC_TRUE; 1498 1499 /* pointer to beginning of each row */ 1500 b->i[0] = 0; 1501 for (i=1; i<mbs+1; i++) { 1502 b->i[i] = b->i[i-1] + b->imax[i-1]; 1503 } 1504 } 1505 1506 B->bs = bs; 1507 b->bs2 = bs2; 1508 b->nz = 0; 1509 b->maxnz = nz*bs2; 1510 1511 b->inew = 0; 1512 b->jnew = 0; 1513 b->anew = 0; 1514 b->a2anew = 0; 1515 b->permute = PETSC_FALSE; 1516 PetscFunctionReturn(0); 1517 } 1518 EXTERN_C_END 1519 1520 EXTERN_C_BEGIN 1521 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqAIJ(Mat,const MatType,MatReuse,Mat*); 1522 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqBAIJ(Mat,const MatType,MatReuse,Mat*); 1523 EXTERN_C_END 1524 1525 /*MC 1526 MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices, 1527 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1528 1529 Options Database Keys: 1530 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions() 1531 1532 Level: beginner 1533 1534 .seealso: MatCreateSeqSBAIJ 1535 M*/ 1536 1537 EXTERN_C_BEGIN 1538 #undef __FUNCT__ 1539 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1540 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqSBAIJ(Mat B) 1541 { 1542 Mat_SeqSBAIJ *b; 1543 PetscErrorCode ierr; 1544 PetscMPIInt size; 1545 PetscTruth flg; 1546 1547 PetscFunctionBegin; 1548 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1549 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1550 B->m = B->M = PetscMax(B->m,B->M); 1551 B->n = B->N = PetscMax(B->n,B->N); 1552 1553 ierr = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1554 B->data = (void*)b; 1555 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1556 B->ops->destroy = MatDestroy_SeqSBAIJ; 1557 B->ops->view = MatView_SeqSBAIJ; 1558 B->factor = 0; 1559 B->mapping = 0; 1560 b->row = 0; 1561 b->icol = 0; 1562 b->reallocs = 0; 1563 b->saved_values = 0; 1564 1565 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1566 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1567 1568 b->sorted = PETSC_FALSE; 1569 b->roworiented = PETSC_TRUE; 1570 b->nonew = 0; 1571 b->diag = 0; 1572 b->solve_work = 0; 1573 b->mult_work = 0; 1574 B->spptr = 0; 1575 b->keepzeroedrows = PETSC_FALSE; 1576 b->xtoy = 0; 1577 b->XtoY = 0; 1578 1579 b->inew = 0; 1580 b->jnew = 0; 1581 b->anew = 0; 1582 b->a2anew = 0; 1583 b->permute = PETSC_FALSE; 1584 1585 b->ignore_ltriangular = PETSC_FALSE; 1586 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_ignore_lower_triangular",&flg);CHKERRQ(ierr); 1587 if (flg) b->ignore_ltriangular = PETSC_TRUE; 1588 1589 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1590 "MatStoreValues_SeqSBAIJ", 1591 MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1592 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1593 "MatRetrieveValues_SeqSBAIJ", 1594 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1595 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1596 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1597 MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1598 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C", 1599 "MatConvert_SeqSBAIJ_SeqAIJ", 1600 MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr); 1601 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C", 1602 "MatConvert_SeqSBAIJ_SeqBAIJ", 1603 MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr); 1604 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C", 1605 "MatSeqSBAIJSetPreallocation_SeqSBAIJ", 1606 MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr); 1607 1608 B->symmetric = PETSC_TRUE; 1609 B->structurally_symmetric = PETSC_TRUE; 1610 B->symmetric_set = PETSC_TRUE; 1611 B->structurally_symmetric_set = PETSC_TRUE; 1612 PetscFunctionReturn(0); 1613 } 1614 EXTERN_C_END 1615 1616 #undef __FUNCT__ 1617 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1618 /*@C 1619 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1620 compressed row) format. For good matrix assembly performance the 1621 user should preallocate the matrix storage by setting the parameter nz 1622 (or the array nnz). By setting these parameters accurately, performance 1623 during matrix assembly can be increased by more than a factor of 50. 1624 1625 Collective on Mat 1626 1627 Input Parameters: 1628 + A - the symmetric matrix 1629 . bs - size of block 1630 . nz - number of block nonzeros per block row (same for all rows) 1631 - nnz - array containing the number of block nonzeros in the upper triangular plus 1632 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1633 1634 Options Database Keys: 1635 . -mat_no_unroll - uses code that does not unroll the loops in the 1636 block calculations (much slower) 1637 . -mat_block_size - size of the blocks to use 1638 1639 Level: intermediate 1640 1641 Notes: 1642 Specify the preallocated storage with either nz or nnz (not both). 1643 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1644 allocation. For additional details, see the users manual chapter on 1645 matrices. 1646 1647 If the nnz parameter is given then the nz parameter is ignored 1648 1649 1650 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1651 @*/ 1652 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 1653 { 1654 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 1655 1656 PetscFunctionBegin; 1657 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1658 if (f) { 1659 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 1660 } 1661 PetscFunctionReturn(0); 1662 } 1663 1664 #undef __FUNCT__ 1665 #define __FUNCT__ "MatCreateSeqSBAIJ" 1666 /*@C 1667 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1668 compressed row) format. For good matrix assembly performance the 1669 user should preallocate the matrix storage by setting the parameter nz 1670 (or the array nnz). By setting these parameters accurately, performance 1671 during matrix assembly can be increased by more than a factor of 50. 1672 1673 Collective on MPI_Comm 1674 1675 Input Parameters: 1676 + comm - MPI communicator, set to PETSC_COMM_SELF 1677 . bs - size of block 1678 . m - number of rows, or number of columns 1679 . nz - number of block nonzeros per block row (same for all rows) 1680 - nnz - array containing the number of block nonzeros in the upper triangular plus 1681 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1682 1683 Output Parameter: 1684 . A - the symmetric matrix 1685 1686 Options Database Keys: 1687 . -mat_no_unroll - uses code that does not unroll the loops in the 1688 block calculations (much slower) 1689 . -mat_block_size - size of the blocks to use 1690 1691 Level: intermediate 1692 1693 Notes: 1694 1695 Specify the preallocated storage with either nz or nnz (not both). 1696 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1697 allocation. For additional details, see the users manual chapter on 1698 matrices. 1699 1700 If the nnz parameter is given then the nz parameter is ignored 1701 1702 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1703 @*/ 1704 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 1705 { 1706 PetscErrorCode ierr; 1707 1708 PetscFunctionBegin; 1709 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1710 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1711 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 1712 PetscFunctionReturn(0); 1713 } 1714 1715 #undef __FUNCT__ 1716 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1717 PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1718 { 1719 Mat C; 1720 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1721 PetscErrorCode ierr; 1722 PetscInt i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 1723 1724 PetscFunctionBegin; 1725 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1726 1727 *B = 0; 1728 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1729 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 1730 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1731 c = (Mat_SeqSBAIJ*)C->data; 1732 1733 C->preallocated = PETSC_TRUE; 1734 C->factor = A->factor; 1735 c->row = 0; 1736 c->icol = 0; 1737 c->saved_values = 0; 1738 c->keepzeroedrows = a->keepzeroedrows; 1739 C->assembled = PETSC_TRUE; 1740 1741 C->M = A->M; 1742 C->N = A->N; 1743 C->bs = A->bs; 1744 c->bs2 = a->bs2; 1745 c->mbs = a->mbs; 1746 c->nbs = a->nbs; 1747 1748 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->imax);CHKERRQ(ierr); 1749 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->ilen);CHKERRQ(ierr); 1750 for (i=0; i<mbs; i++) { 1751 c->imax[i] = a->imax[i]; 1752 c->ilen[i] = a->ilen[i]; 1753 } 1754 1755 /* allocate the matrix space */ 1756 ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 1757 c->singlemalloc = PETSC_TRUE; 1758 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1759 if (mbs > 0) { 1760 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1761 if (cpvalues == MAT_COPY_VALUES) { 1762 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1763 } else { 1764 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1765 } 1766 } 1767 1768 ierr = PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr); 1769 c->sorted = a->sorted; 1770 c->roworiented = a->roworiented; 1771 c->nonew = a->nonew; 1772 1773 if (a->diag) { 1774 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 1775 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1776 for (i=0; i<mbs; i++) { 1777 c->diag[i] = a->diag[i]; 1778 } 1779 } else c->diag = 0; 1780 c->nz = a->nz; 1781 c->maxnz = a->maxnz; 1782 c->solve_work = 0; 1783 c->mult_work = 0; 1784 *B = C; 1785 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1786 PetscFunctionReturn(0); 1787 } 1788 1789 #undef __FUNCT__ 1790 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1791 PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer,const MatType type,Mat *A) 1792 { 1793 Mat_SeqSBAIJ *a; 1794 Mat B; 1795 PetscErrorCode ierr; 1796 int fd; 1797 PetscMPIInt size; 1798 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 1799 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 1800 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 1801 PetscInt *masked,nmask,tmp,bs2,ishift; 1802 PetscScalar *aa; 1803 MPI_Comm comm = ((PetscObject)viewer)->comm; 1804 1805 PetscFunctionBegin; 1806 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1807 bs2 = bs*bs; 1808 1809 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1810 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1811 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1812 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1813 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1814 M = header[1]; N = header[2]; nz = header[3]; 1815 1816 if (header[3] < 0) { 1817 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1818 } 1819 1820 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1821 1822 /* 1823 This code adds extra rows to make sure the number of rows is 1824 divisible by the blocksize 1825 */ 1826 mbs = M/bs; 1827 extra_rows = bs - M + bs*(mbs); 1828 if (extra_rows == bs) extra_rows = 0; 1829 else mbs++; 1830 if (extra_rows) { 1831 ierr = PetscLogInfo((0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr); 1832 } 1833 1834 /* read in row lengths */ 1835 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1836 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1837 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1838 1839 /* read in column indices */ 1840 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1841 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1842 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1843 1844 /* loop over row lengths determining block row lengths */ 1845 ierr = PetscMalloc(mbs*sizeof(PetscInt),&s_browlengths);CHKERRQ(ierr); 1846 ierr = PetscMemzero(s_browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1847 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 1848 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1849 masked = mask + mbs; 1850 rowcount = 0; nzcount = 0; 1851 for (i=0; i<mbs; i++) { 1852 nmask = 0; 1853 for (j=0; j<bs; j++) { 1854 kmax = rowlengths[rowcount]; 1855 for (k=0; k<kmax; k++) { 1856 tmp = jj[nzcount++]/bs; /* block col. index */ 1857 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1858 } 1859 rowcount++; 1860 } 1861 s_browlengths[i] += nmask; 1862 1863 /* zero out the mask elements we set */ 1864 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1865 } 1866 1867 /* create our matrix */ 1868 ierr = MatCreate(comm,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows,&B);CHKERRQ(ierr); 1869 ierr = MatSetType(B,type);CHKERRQ(ierr); 1870 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(B,bs,0,s_browlengths);CHKERRQ(ierr); 1871 a = (Mat_SeqSBAIJ*)B->data; 1872 1873 /* set matrix "i" values */ 1874 a->i[0] = 0; 1875 for (i=1; i<= mbs; i++) { 1876 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1877 a->ilen[i-1] = s_browlengths[i-1]; 1878 } 1879 a->nz = a->i[mbs]; 1880 1881 /* read in nonzero values */ 1882 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1883 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1884 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1885 1886 /* set "a" and "j" values into matrix */ 1887 nzcount = 0; jcount = 0; 1888 for (i=0; i<mbs; i++) { 1889 nzcountb = nzcount; 1890 nmask = 0; 1891 for (j=0; j<bs; j++) { 1892 kmax = rowlengths[i*bs+j]; 1893 for (k=0; k<kmax; k++) { 1894 tmp = jj[nzcount++]/bs; /* block col. index */ 1895 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1896 } 1897 } 1898 /* sort the masked values */ 1899 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1900 1901 /* set "j" values into matrix */ 1902 maskcount = 1; 1903 for (j=0; j<nmask; j++) { 1904 a->j[jcount++] = masked[j]; 1905 mask[masked[j]] = maskcount++; 1906 } 1907 1908 /* set "a" values into matrix */ 1909 ishift = bs2*a->i[i]; 1910 for (j=0; j<bs; j++) { 1911 kmax = rowlengths[i*bs+j]; 1912 for (k=0; k<kmax; k++) { 1913 tmp = jj[nzcountb]/bs ; /* block col. index */ 1914 if (tmp >= i){ 1915 block = mask[tmp] - 1; 1916 point = jj[nzcountb] - bs*tmp; 1917 idx = ishift + bs2*block + j + bs*point; 1918 a->a[idx] = aa[nzcountb]; 1919 } 1920 nzcountb++; 1921 } 1922 } 1923 /* zero out the mask elements we set */ 1924 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1925 } 1926 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1927 1928 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1929 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1930 ierr = PetscFree(aa);CHKERRQ(ierr); 1931 ierr = PetscFree(jj);CHKERRQ(ierr); 1932 ierr = PetscFree(mask);CHKERRQ(ierr); 1933 1934 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1935 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1936 ierr = MatView_Private(B);CHKERRQ(ierr); 1937 *A = B; 1938 PetscFunctionReturn(0); 1939 } 1940 1941 #undef __FUNCT__ 1942 #define __FUNCT__ "MatRelax_SeqSBAIJ" 1943 PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1944 { 1945 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1946 MatScalar *aa=a->a,*v,*v1; 1947 PetscScalar *x,*b,*t,sum,d; 1948 PetscErrorCode ierr; 1949 PetscInt m=a->mbs,bs=A->bs,*ai=a->i,*aj=a->j; 1950 PetscInt nz,nz1,*vj,*vj1,i; 1951 1952 PetscFunctionBegin; 1953 its = its*lits; 1954 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1955 1956 if (bs > 1) 1957 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 1958 1959 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1960 if (xx != bb) { 1961 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1962 } else { 1963 b = x; 1964 } 1965 1966 ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr); 1967 1968 if (flag & SOR_ZERO_INITIAL_GUESS) { 1969 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1970 for (i=0; i<m; i++) 1971 t[i] = b[i]; 1972 1973 for (i=0; i<m; i++){ 1974 d = *(aa + ai[i]); /* diag[i] */ 1975 v = aa + ai[i] + 1; 1976 vj = aj + ai[i] + 1; 1977 nz = ai[i+1] - ai[i] - 1; 1978 x[i] = omega*t[i]/d; 1979 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 1980 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 1981 } 1982 } 1983 1984 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 1985 for (i=0; i<m; i++) 1986 t[i] = b[i]; 1987 1988 for (i=0; i<m-1; i++){ /* update rhs */ 1989 v = aa + ai[i] + 1; 1990 vj = aj + ai[i] + 1; 1991 nz = ai[i+1] - ai[i] - 1; 1992 while (nz--) t[*vj++] -= x[i]*(*v++); 1993 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 1994 } 1995 for (i=m-1; i>=0; i--){ 1996 d = *(aa + ai[i]); 1997 v = aa + ai[i] + 1; 1998 vj = aj + ai[i] + 1; 1999 nz = ai[i+1] - ai[i] - 1; 2000 sum = t[i]; 2001 while (nz--) sum -= x[*vj++]*(*v++); 2002 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2003 x[i] = (1-omega)*x[i] + omega*sum/d; 2004 } 2005 } 2006 its--; 2007 } 2008 2009 while (its--) { 2010 /* 2011 forward sweep: 2012 for i=0,...,m-1: 2013 sum[i] = (b[i] - U(i,:)x )/d[i]; 2014 x[i] = (1-omega)x[i] + omega*sum[i]; 2015 b = b - x[i]*U^T(i,:); 2016 2017 */ 2018 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2019 for (i=0; i<m; i++) 2020 t[i] = b[i]; 2021 2022 for (i=0; i<m; i++){ 2023 d = *(aa + ai[i]); /* diag[i] */ 2024 v = aa + ai[i] + 1; v1=v; 2025 vj = aj + ai[i] + 1; vj1=vj; 2026 nz = ai[i+1] - ai[i] - 1; nz1=nz; 2027 sum = t[i]; 2028 while (nz1--) sum -= (*v1++)*x[*vj1++]; 2029 x[i] = (1-omega)*x[i] + omega*sum/d; 2030 while (nz--) t[*vj++] -= x[i]*(*v++); 2031 ierr = PetscLogFlops(4*nz-2);CHKERRQ(ierr); 2032 } 2033 } 2034 2035 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2036 /* 2037 backward sweep: 2038 b = b - x[i]*U^T(i,:), i=0,...,n-2 2039 for i=m-1,...,0: 2040 sum[i] = (b[i] - U(i,:)x )/d[i]; 2041 x[i] = (1-omega)x[i] + omega*sum[i]; 2042 */ 2043 for (i=0; i<m; i++) 2044 t[i] = b[i]; 2045 2046 for (i=0; i<m-1; i++){ /* update rhs */ 2047 v = aa + ai[i] + 1; 2048 vj = aj + ai[i] + 1; 2049 nz = ai[i+1] - ai[i] - 1; 2050 while (nz--) t[*vj++] -= x[i]*(*v++); 2051 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2052 } 2053 for (i=m-1; i>=0; i--){ 2054 d = *(aa + ai[i]); 2055 v = aa + ai[i] + 1; 2056 vj = aj + ai[i] + 1; 2057 nz = ai[i+1] - ai[i] - 1; 2058 sum = t[i]; 2059 while (nz--) sum -= x[*vj++]*(*v++); 2060 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2061 x[i] = (1-omega)*x[i] + omega*sum/d; 2062 } 2063 } 2064 } 2065 2066 ierr = PetscFree(t);CHKERRQ(ierr); 2067 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2068 if (bb != xx) { 2069 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2070 } 2071 PetscFunctionReturn(0); 2072 } 2073 2074 2075 2076 2077 2078 2079