1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the SBAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 8 #include "src/inline/spops.h" 9 #include "src/mat/impls/sbaij/seq/sbaij.h" 10 11 #define CHUNKSIZE 10 12 13 /* 14 Checks for missing diagonals 15 */ 16 #undef __FUNCT__ 17 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 18 PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A) 19 { 20 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 21 PetscErrorCode ierr; 22 PetscInt *diag,*jj = a->j,i; 23 24 PetscFunctionBegin; 25 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 26 diag = a->diag; 27 for (i=0; i<a->mbs; i++) { 28 if (jj[diag[i]] != i) SETERRQ1(PETSC_ERR_ARG_CORRUPT,"Matrix is missing diagonal number %D",i); 29 } 30 PetscFunctionReturn(0); 31 } 32 33 #undef __FUNCT__ 34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 35 PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A) 36 { 37 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 38 PetscErrorCode ierr; 39 PetscInt i,mbs = a->mbs; 40 41 PetscFunctionBegin; 42 if (a->diag) PetscFunctionReturn(0); 43 44 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 45 ierr = PetscLogObjectMemory(A,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 46 for (i=0; i<mbs; i++) a->diag[i] = a->i[i]; 47 PetscFunctionReturn(0); 48 } 49 50 #undef __FUNCT__ 51 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 52 static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 53 { 54 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 55 PetscInt n = a->mbs,i; 56 57 PetscFunctionBegin; 58 *nn = n; 59 if (!ia) PetscFunctionReturn(0); 60 61 if (oshift == 1) { 62 /* temporarily add 1 to i and j indices */ 63 PetscInt nz = a->i[n]; 64 for (i=0; i<nz; i++) a->j[i]++; 65 for (i=0; i<n+1; i++) a->i[i]++; 66 *ia = a->i; *ja = a->j; 67 } else { 68 *ia = a->i; *ja = a->j; 69 } 70 PetscFunctionReturn(0); 71 } 72 73 #undef __FUNCT__ 74 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 75 static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 76 { 77 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 78 PetscInt i,n = a->mbs; 79 80 PetscFunctionBegin; 81 if (!ia) PetscFunctionReturn(0); 82 83 if (oshift == 1) { 84 PetscInt nz = a->i[n]-1; 85 for (i=0; i<nz; i++) a->j[i]--; 86 for (i=0; i<n+1; i++) a->i[i]--; 87 } 88 PetscFunctionReturn(0); 89 } 90 91 #undef __FUNCT__ 92 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 93 PetscErrorCode MatDestroy_SeqSBAIJ(Mat A) 94 { 95 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 96 PetscErrorCode ierr; 97 98 PetscFunctionBegin; 99 PetscLogObjectState((PetscObject)A,"Rows=%D, NZ=%D",A->m,a->nz); 100 ierr = MatSeqXAIJFreeAIJ(a->singlemalloc,&a->a,&a->j,&a->i);CHKERRQ(ierr); 101 if (a->row) { 102 ierr = ISDestroy(a->row);CHKERRQ(ierr); 103 } 104 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 105 if (a->imax) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 106 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 107 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 108 if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);} 109 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 110 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 111 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 112 113 if (a->inew){ 114 ierr = PetscFree(a->inew);CHKERRQ(ierr); 115 a->inew = 0; 116 } 117 ierr = PetscFree(a);CHKERRQ(ierr); 118 119 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 120 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 121 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 122 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 123 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqbaij_C","",PETSC_NULL);CHKERRQ(ierr); 124 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 125 PetscFunctionReturn(0); 126 } 127 128 #undef __FUNCT__ 129 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 130 PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op) 131 { 132 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 133 PetscErrorCode ierr; 134 135 PetscFunctionBegin; 136 switch (op) { 137 case MAT_ROW_ORIENTED: 138 a->roworiented = PETSC_TRUE; 139 break; 140 case MAT_COLUMN_ORIENTED: 141 a->roworiented = PETSC_FALSE; 142 break; 143 case MAT_COLUMNS_SORTED: 144 a->sorted = PETSC_TRUE; 145 break; 146 case MAT_COLUMNS_UNSORTED: 147 a->sorted = PETSC_FALSE; 148 break; 149 case MAT_KEEP_ZEROED_ROWS: 150 a->keepzeroedrows = PETSC_TRUE; 151 break; 152 case MAT_NO_NEW_NONZERO_LOCATIONS: 153 a->nonew = 1; 154 break; 155 case MAT_NEW_NONZERO_LOCATION_ERR: 156 a->nonew = -1; 157 break; 158 case MAT_NEW_NONZERO_ALLOCATION_ERR: 159 a->nonew = -2; 160 break; 161 case MAT_YES_NEW_NONZERO_LOCATIONS: 162 a->nonew = 0; 163 break; 164 case MAT_ROWS_SORTED: 165 case MAT_ROWS_UNSORTED: 166 case MAT_YES_NEW_DIAGONALS: 167 case MAT_IGNORE_OFF_PROC_ENTRIES: 168 case MAT_USE_HASH_TABLE: 169 ierr = PetscLogInfo((A,"MatSetOption_SeqSBAIJ:Option ignored\n"));CHKERRQ(ierr); 170 break; 171 case MAT_NO_NEW_DIAGONALS: 172 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 173 case MAT_NOT_SYMMETRIC: 174 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 175 case MAT_HERMITIAN: 176 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 177 case MAT_SYMMETRIC: 178 case MAT_STRUCTURALLY_SYMMETRIC: 179 case MAT_NOT_HERMITIAN: 180 case MAT_SYMMETRY_ETERNAL: 181 case MAT_NOT_SYMMETRY_ETERNAL: 182 case MAT_IGNORE_LOWER_TRIANGULAR: 183 a->ignore_ltriangular = PETSC_TRUE; 184 break; 185 case MAT_ERROR_LOWER_TRIANGULAR: 186 a->ignore_ltriangular = PETSC_FALSE; 187 break; 188 default: 189 SETERRQ(PETSC_ERR_SUP,"unknown option"); 190 } 191 PetscFunctionReturn(0); 192 } 193 194 #undef __FUNCT__ 195 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 196 PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **v) 197 { 198 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 199 PetscErrorCode ierr; 200 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2; 201 MatScalar *aa,*aa_i; 202 PetscScalar *v_i; 203 204 PetscFunctionBegin; 205 if (A) SETERRQ(PETSC_ERR_SUP,"MatGetRow is not supported for SBAIJ matrix format"); 206 /* The old version below either gets the upper triangle part, or too expensive. 207 We take out the support for MatGetRow_SeqSBAIJ() */ 208 bs = A->bs; 209 ai = a->i; 210 aj = a->j; 211 aa = a->a; 212 bs2 = a->bs2; 213 214 if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %D out of range", row); 215 216 bn = row/bs; /* Block number */ 217 bp = row % bs; /* Block position */ 218 M = ai[bn+1] - ai[bn]; 219 *ncols = bs*M; 220 221 if (v) { 222 *v = 0; 223 if (*ncols) { 224 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 225 for (i=0; i<M; i++) { /* for each block in the block row */ 226 v_i = *v + i*bs; 227 aa_i = aa + bs2*(ai[bn] + i); 228 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 229 } 230 } 231 } 232 233 if (cols) { 234 *cols = 0; 235 if (*ncols) { 236 ierr = PetscMalloc((*ncols+row)*sizeof(PetscInt),cols);CHKERRQ(ierr); 237 for (i=0; i<M; i++) { /* for each block in the block row */ 238 cols_i = *cols + i*bs; 239 itmp = bs*aj[ai[bn] + i]; 240 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 241 } 242 } 243 } 244 245 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 246 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 247 #ifdef column_search 248 v_i = *v + M*bs; 249 cols_i = *cols + M*bs; 250 for (i=0; i<bn; i++){ /* for each block row */ 251 M = ai[i+1] - ai[i]; 252 for (j=0; j<M; j++){ 253 itmp = aj[ai[i] + j]; /* block column value */ 254 if (itmp == bn){ 255 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 256 for (k=0; k<bs; k++) { 257 *cols_i++ = i*bs+k; 258 *v_i++ = aa_i[k]; 259 } 260 *ncols += bs; 261 break; 262 } 263 } 264 } 265 #endif 266 PetscFunctionReturn(0); 267 } 268 269 #undef __FUNCT__ 270 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 271 PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 272 { 273 PetscErrorCode ierr; 274 275 PetscFunctionBegin; 276 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 277 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 278 PetscFunctionReturn(0); 279 } 280 281 #undef __FUNCT__ 282 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 283 PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,Mat *B) 284 { 285 PetscErrorCode ierr; 286 PetscFunctionBegin; 287 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 288 PetscFunctionReturn(0); 289 } 290 291 #undef __FUNCT__ 292 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 293 static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 294 { 295 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 296 PetscErrorCode ierr; 297 PetscInt i,j,bs = A->bs,k,l,bs2=a->bs2; 298 const char *name; 299 PetscViewerFormat format; 300 301 PetscFunctionBegin; 302 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 303 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 304 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 305 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 306 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 307 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 308 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 309 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 310 for (i=0; i<a->mbs; i++) { 311 for (j=0; j<bs; j++) { 312 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 313 for (k=a->i[i]; k<a->i[i+1]; k++) { 314 for (l=0; l<bs; l++) { 315 #if defined(PETSC_USE_COMPLEX) 316 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 317 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 318 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 319 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 320 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 321 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 322 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 323 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 324 } 325 #else 326 if (a->a[bs2*k + l*bs + j] != 0.0) { 327 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 328 } 329 #endif 330 } 331 } 332 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 333 } 334 } 335 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 336 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 337 PetscFunctionReturn(0); 338 } else { 339 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 340 for (i=0; i<a->mbs; i++) { 341 for (j=0; j<bs; j++) { 342 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 343 for (k=a->i[i]; k<a->i[i+1]; k++) { 344 for (l=0; l<bs; l++) { 345 #if defined(PETSC_USE_COMPLEX) 346 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 347 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 348 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 349 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 350 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 351 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 352 } else { 353 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 354 } 355 #else 356 ierr = PetscViewerASCIIPrintf(viewer," %D %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 357 #endif 358 } 359 } 360 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 361 } 362 } 363 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 364 } 365 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 366 PetscFunctionReturn(0); 367 } 368 369 #undef __FUNCT__ 370 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 371 static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 372 { 373 Mat A = (Mat) Aa; 374 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 375 PetscErrorCode ierr; 376 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->bs,bs2=a->bs2; 377 PetscMPIInt rank; 378 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 379 MatScalar *aa; 380 MPI_Comm comm; 381 PetscViewer viewer; 382 383 PetscFunctionBegin; 384 /* 385 This is nasty. If this is called from an originally parallel matrix 386 then all processes call this,but only the first has the matrix so the 387 rest should return immediately. 388 */ 389 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 390 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 391 if (rank) PetscFunctionReturn(0); 392 393 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 394 395 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 396 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 397 398 /* loop over matrix elements drawing boxes */ 399 color = PETSC_DRAW_BLUE; 400 for (i=0,row=0; i<mbs; i++,row+=bs) { 401 for (j=a->i[i]; j<a->i[i+1]; j++) { 402 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 403 x_l = a->j[j]*bs; x_r = x_l + 1.0; 404 aa = a->a + j*bs2; 405 for (k=0; k<bs; k++) { 406 for (l=0; l<bs; l++) { 407 if (PetscRealPart(*aa++) >= 0.) continue; 408 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 409 } 410 } 411 } 412 } 413 color = PETSC_DRAW_CYAN; 414 for (i=0,row=0; i<mbs; i++,row+=bs) { 415 for (j=a->i[i]; j<a->i[i+1]; j++) { 416 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 417 x_l = a->j[j]*bs; x_r = x_l + 1.0; 418 aa = a->a + j*bs2; 419 for (k=0; k<bs; k++) { 420 for (l=0; l<bs; l++) { 421 if (PetscRealPart(*aa++) != 0.) continue; 422 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 423 } 424 } 425 } 426 } 427 428 color = PETSC_DRAW_RED; 429 for (i=0,row=0; i<mbs; i++,row+=bs) { 430 for (j=a->i[i]; j<a->i[i+1]; j++) { 431 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 432 x_l = a->j[j]*bs; x_r = x_l + 1.0; 433 aa = a->a + j*bs2; 434 for (k=0; k<bs; k++) { 435 for (l=0; l<bs; l++) { 436 if (PetscRealPart(*aa++) <= 0.) continue; 437 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 438 } 439 } 440 } 441 } 442 PetscFunctionReturn(0); 443 } 444 445 #undef __FUNCT__ 446 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 447 static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 448 { 449 PetscErrorCode ierr; 450 PetscReal xl,yl,xr,yr,w,h; 451 PetscDraw draw; 452 PetscTruth isnull; 453 454 PetscFunctionBegin; 455 456 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 457 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 458 459 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 460 xr = A->m; yr = A->m; h = yr/10.0; w = xr/10.0; 461 xr += w; yr += h; xl = -w; yl = -h; 462 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 463 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 464 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 465 PetscFunctionReturn(0); 466 } 467 468 #undef __FUNCT__ 469 #define __FUNCT__ "MatView_SeqSBAIJ" 470 PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 471 { 472 PetscErrorCode ierr; 473 PetscTruth iascii,isdraw; 474 475 PetscFunctionBegin; 476 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 477 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 478 if (iascii){ 479 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 480 } else if (isdraw) { 481 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 482 } else { 483 Mat B; 484 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 485 ierr = MatView(B,viewer);CHKERRQ(ierr); 486 ierr = MatDestroy(B);CHKERRQ(ierr); 487 } 488 PetscFunctionReturn(0); 489 } 490 491 492 #undef __FUNCT__ 493 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 494 PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 495 { 496 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 497 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 498 PetscInt *ai = a->i,*ailen = a->ilen; 499 PetscInt brow,bcol,ridx,cidx,bs=A->bs,bs2=a->bs2; 500 MatScalar *ap,*aa = a->a,zero = 0.0; 501 502 PetscFunctionBegin; 503 for (k=0; k<m; k++) { /* loop over rows */ 504 row = im[k]; brow = row/bs; 505 if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",row); 506 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1); 507 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 508 nrow = ailen[brow]; 509 for (l=0; l<n; l++) { /* loop over columns */ 510 if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",in[l]); 511 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->n-1); 512 col = in[l] ; 513 bcol = col/bs; 514 cidx = col%bs; 515 ridx = row%bs; 516 high = nrow; 517 low = 0; /* assume unsorted */ 518 while (high-low > 5) { 519 t = (low+high)/2; 520 if (rp[t] > bcol) high = t; 521 else low = t; 522 } 523 for (i=low; i<high; i++) { 524 if (rp[i] > bcol) break; 525 if (rp[i] == bcol) { 526 *v++ = ap[bs2*i+bs*cidx+ridx]; 527 goto finished; 528 } 529 } 530 *v++ = zero; 531 finished:; 532 } 533 } 534 PetscFunctionReturn(0); 535 } 536 537 538 #undef __FUNCT__ 539 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 540 PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 541 { 542 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 543 PetscErrorCode ierr; 544 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 545 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 546 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->bs,stepval; 547 PetscTruth roworiented=a->roworiented; 548 const MatScalar *value = v; 549 MatScalar *ap,*aa = a->a,*bap; 550 551 PetscFunctionBegin; 552 if (roworiented) { 553 stepval = (n-1)*bs; 554 } else { 555 stepval = (m-1)*bs; 556 } 557 for (k=0; k<m; k++) { /* loop over added rows */ 558 row = im[k]; 559 if (row < 0) continue; 560 #if defined(PETSC_USE_DEBUG) 561 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 562 #endif 563 rp = aj + ai[row]; 564 ap = aa + bs2*ai[row]; 565 rmax = imax[row]; 566 nrow = ailen[row]; 567 low = 0; 568 high = nrow; 569 for (l=0; l<n; l++) { /* loop over added columns */ 570 if (in[l] < 0) continue; 571 col = in[l]; 572 #if defined(PETSC_USE_DEBUG) 573 if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",col,a->nbs-1); 574 #endif 575 if (col < row) continue; /* ignore lower triangular block */ 576 if (roworiented) { 577 value = v + k*(stepval+bs)*bs + l*bs; 578 } else { 579 value = v + l*(stepval+bs)*bs + k*bs; 580 } 581 if (col <= lastcol) low = 0; else high = nrow; 582 lastcol = col; 583 while (high-low > 7) { 584 t = (low+high)/2; 585 if (rp[t] > col) high = t; 586 else low = t; 587 } 588 for (i=low; i<high; i++) { 589 if (rp[i] > col) break; 590 if (rp[i] == col) { 591 bap = ap + bs2*i; 592 if (roworiented) { 593 if (is == ADD_VALUES) { 594 for (ii=0; ii<bs; ii++,value+=stepval) { 595 for (jj=ii; jj<bs2; jj+=bs) { 596 bap[jj] += *value++; 597 } 598 } 599 } else { 600 for (ii=0; ii<bs; ii++,value+=stepval) { 601 for (jj=ii; jj<bs2; jj+=bs) { 602 bap[jj] = *value++; 603 } 604 } 605 } 606 } else { 607 if (is == ADD_VALUES) { 608 for (ii=0; ii<bs; ii++,value+=stepval) { 609 for (jj=0; jj<bs; jj++) { 610 *bap++ += *value++; 611 } 612 } 613 } else { 614 for (ii=0; ii<bs; ii++,value+=stepval) { 615 for (jj=0; jj<bs; jj++) { 616 *bap++ = *value++; 617 } 618 } 619 } 620 } 621 goto noinsert2; 622 } 623 } 624 if (nonew == 1) goto noinsert2; 625 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 626 MatSeqXAIJReallocateAIJ(a,bs2,nrow,row,col,rmax,aa,ai,aj,a->mbs,rp,ap,imax,nonew); 627 N = nrow++ - 1; 628 /* shift up all the later entries in this row */ 629 for (ii=N; ii>=i; ii--) { 630 rp[ii+1] = rp[ii]; 631 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 632 } 633 if (N >= i) { 634 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 635 } 636 rp[i] = col; 637 bap = ap + bs2*i; 638 if (roworiented) { 639 for (ii=0; ii<bs; ii++,value+=stepval) { 640 for (jj=ii; jj<bs2; jj+=bs) { 641 bap[jj] = *value++; 642 } 643 } 644 } else { 645 for (ii=0; ii<bs; ii++,value+=stepval) { 646 for (jj=0; jj<bs; jj++) { 647 *bap++ = *value++; 648 } 649 } 650 } 651 noinsert2:; 652 low = i; 653 } 654 ailen[row] = nrow; 655 } 656 PetscFunctionReturn(0); 657 } 658 659 #undef __FUNCT__ 660 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 661 PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 662 { 663 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 664 PetscErrorCode ierr; 665 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 666 PetscInt m = A->m,*ip,N,*ailen = a->ilen; 667 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 668 MatScalar *aa = a->a,*ap; 669 670 PetscFunctionBegin; 671 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 672 673 if (m) rmax = ailen[0]; 674 for (i=1; i<mbs; i++) { 675 /* move each row back by the amount of empty slots (fshift) before it*/ 676 fshift += imax[i-1] - ailen[i-1]; 677 rmax = PetscMax(rmax,ailen[i]); 678 if (fshift) { 679 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 680 N = ailen[i]; 681 for (j=0; j<N; j++) { 682 ip[j-fshift] = ip[j]; 683 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 684 } 685 } 686 ai[i] = ai[i-1] + ailen[i-1]; 687 } 688 if (mbs) { 689 fshift += imax[mbs-1] - ailen[mbs-1]; 690 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 691 } 692 /* reset ilen and imax for each row */ 693 for (i=0; i<mbs; i++) { 694 ailen[i] = imax[i] = ai[i+1] - ai[i]; 695 } 696 a->nz = ai[mbs]; 697 698 /* diagonals may have moved, reset it */ 699 if (a->diag) { 700 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 701 } 702 ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n", 703 m,A->m,A->bs,fshift*bs2,a->nz*bs2));CHKERRQ(ierr); 704 ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %D\n",a->reallocs));CHKERRQ(ierr); 705 ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %D\n",rmax));CHKERRQ(ierr); 706 a->reallocs = 0; 707 A->info.nz_unneeded = (PetscReal)fshift*bs2; 708 PetscFunctionReturn(0); 709 } 710 711 /* 712 This function returns an array of flags which indicate the locations of contiguous 713 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 714 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 715 Assume: sizes should be long enough to hold all the values. 716 */ 717 #undef __FUNCT__ 718 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 719 PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 720 { 721 PetscInt i,j,k,row; 722 PetscTruth flg; 723 724 PetscFunctionBegin; 725 for (i=0,j=0; i<n; j++) { 726 row = idx[i]; 727 if (row%bs!=0) { /* Not the begining of a block */ 728 sizes[j] = 1; 729 i++; 730 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 731 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 732 i++; 733 } else { /* Begining of the block, so check if the complete block exists */ 734 flg = PETSC_TRUE; 735 for (k=1; k<bs; k++) { 736 if (row+k != idx[i+k]) { /* break in the block */ 737 flg = PETSC_FALSE; 738 break; 739 } 740 } 741 if (flg) { /* No break in the bs */ 742 sizes[j] = bs; 743 i+= bs; 744 } else { 745 sizes[j] = 1; 746 i++; 747 } 748 } 749 } 750 *bs_max = j; 751 PetscFunctionReturn(0); 752 } 753 754 755 /* Only add/insert a(i,j) with i<=j (blocks). 756 Any a(i,j) with i>j input by user is ingored. 757 */ 758 759 #undef __FUNCT__ 760 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 761 PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 762 { 763 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 764 PetscErrorCode ierr; 765 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 766 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 767 PetscInt *aj=a->j,nonew=a->nonew,bs=A->bs,brow,bcol; 768 PetscInt ridx,cidx,bs2=a->bs2; 769 MatScalar *ap,value,*aa=a->a,*bap; 770 771 PetscFunctionBegin; 772 for (k=0; k<m; k++) { /* loop over added rows */ 773 row = im[k]; /* row number */ 774 brow = row/bs; /* block row number */ 775 if (row < 0) continue; 776 #if defined(PETSC_USE_DEBUG) 777 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1); 778 #endif 779 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 780 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 781 rmax = imax[brow]; /* maximum space allocated for this row */ 782 nrow = ailen[brow]; /* actual length of this row */ 783 low = 0; 784 785 for (l=0; l<n; l++) { /* loop over added columns */ 786 if (in[l] < 0) continue; 787 #if defined(PETSC_USE_DEBUG) 788 if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->m-1); 789 #endif 790 col = in[l]; 791 bcol = col/bs; /* block col number */ 792 793 if (brow > bcol) { 794 if (a->ignore_ltriangular){ 795 continue; /* ignore lower triangular values */ 796 } else { 797 SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR)"); 798 } 799 } 800 801 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 802 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 803 /* element value a(k,l) */ 804 if (roworiented) { 805 value = v[l + k*n]; 806 } else { 807 value = v[k + l*m]; 808 } 809 810 /* move pointer bap to a(k,l) quickly and add/insert value */ 811 if (col <= lastcol) low = 0; high = nrow; 812 lastcol = col; 813 while (high-low > 7) { 814 t = (low+high)/2; 815 if (rp[t] > bcol) high = t; 816 else low = t; 817 } 818 for (i=low; i<high; i++) { 819 if (rp[i] > bcol) break; 820 if (rp[i] == bcol) { 821 bap = ap + bs2*i + bs*cidx + ridx; 822 if (is == ADD_VALUES) *bap += value; 823 else *bap = value; 824 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 825 if (brow == bcol && ridx < cidx){ 826 bap = ap + bs2*i + bs*ridx + cidx; 827 if (is == ADD_VALUES) *bap += value; 828 else *bap = value; 829 } 830 goto noinsert1; 831 } 832 } 833 834 if (nonew == 1) goto noinsert1; 835 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 836 MatSeqXAIJReallocateAIJ(a,bs2,nrow,brow,bcol,rmax,aa,ai,aj,a->mbs,rp,ap,imax,nonew); 837 838 N = nrow++ - 1; 839 /* shift up all the later entries in this row */ 840 for (ii=N; ii>=i; ii--) { 841 rp[ii+1] = rp[ii]; 842 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 843 } 844 if (N>=i) { 845 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 846 } 847 rp[i] = bcol; 848 ap[bs2*i + bs*cidx + ridx] = value; 849 noinsert1:; 850 low = i; 851 } 852 } /* end of loop over added columns */ 853 ailen[brow] = nrow; 854 } /* end of loop over added rows */ 855 PetscFunctionReturn(0); 856 } 857 858 EXTERN PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*); 859 EXTERN PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*); 860 EXTERN PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat,PetscInt,IS[],PetscInt); 861 EXTERN PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,PetscInt,MatReuse,Mat*); 862 EXTERN PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat*[]); 863 EXTERN PetscErrorCode MatScale_SeqSBAIJ(Mat,PetscScalar); 864 EXTERN PetscErrorCode MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *); 865 EXTERN PetscErrorCode MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*); 866 EXTERN PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat,Vec); 867 EXTERN PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec); 868 EXTERN PetscErrorCode MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *); 869 EXTERN PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat); 870 EXTERN PetscErrorCode MatGetRowMax_SeqSBAIJ(Mat,Vec); 871 872 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N(Mat,Vec,Vec); 873 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1(Mat,Vec,Vec); 874 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2(Mat,Vec,Vec); 875 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3(Mat,Vec,Vec); 876 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4(Mat,Vec,Vec); 877 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5(Mat,Vec,Vec); 878 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6(Mat,Vec,Vec); 879 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7(Mat,Vec,Vec); 880 881 EXTERN PetscErrorCode MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs); 882 883 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec); 884 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec); 885 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec); 886 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 887 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec); 888 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec); 889 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec); 890 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec); 891 892 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,MatFactorInfo*,Mat*); 893 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,MatFactorInfo*,Mat*); 894 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,MatFactorInfo*,Mat*); 895 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,MatFactorInfo*,Mat*); 896 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,MatFactorInfo*,Mat*); 897 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,MatFactorInfo*,Mat*); 898 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,MatFactorInfo*,Mat*); 899 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,MatFactorInfo*,Mat*); 900 EXTERN PetscErrorCode MatGetInertia_SeqSBAIJ(Mat,PetscInt*,PetscInt*,PetscInt*); 901 902 EXTERN PetscErrorCode MatMult_SeqSBAIJ_1(Mat,Vec,Vec); 903 EXTERN PetscErrorCode MatMult_SeqSBAIJ_2(Mat,Vec,Vec); 904 EXTERN PetscErrorCode MatMult_SeqSBAIJ_3(Mat,Vec,Vec); 905 EXTERN PetscErrorCode MatMult_SeqSBAIJ_4(Mat,Vec,Vec); 906 EXTERN PetscErrorCode MatMult_SeqSBAIJ_5(Mat,Vec,Vec); 907 EXTERN PetscErrorCode MatMult_SeqSBAIJ_6(Mat,Vec,Vec); 908 EXTERN PetscErrorCode MatMult_SeqSBAIJ_7(Mat,Vec,Vec); 909 EXTERN PetscErrorCode MatMult_SeqSBAIJ_N(Mat,Vec,Vec); 910 911 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec); 912 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec); 913 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec); 914 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec); 915 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec); 916 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec); 917 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec); 918 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec); 919 920 #ifdef HAVE_MatICCFactor 921 /* modified from MatILUFactor_SeqSBAIJ, needs further work! */ 922 #undef __FUNCT__ 923 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 924 PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info) 925 { 926 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 927 Mat outA; 928 PetscErrorCode ierr; 929 PetscTruth row_identity,col_identity; 930 931 PetscFunctionBegin; 932 outA = inA; 933 inA->factor = FACTOR_CHOLESKY; 934 935 if (!a->diag) { 936 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 937 } 938 /* 939 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 940 for ILU(0) factorization with natural ordering 941 */ 942 switch (a->bs) { 943 case 1: 944 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 945 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 946 inA->ops->solves = MatSolves_SeqSBAIJ_1; 947 ierr = PetscLoginfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n"));CHKERRQ(ierr); 948 case 2: 949 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 950 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 951 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 952 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"));CHKERRQ(ierr); 953 break; 954 case 3: 955 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 956 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 957 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 958 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n"));CHKERRQ(ierr); 959 break; 960 case 4: 961 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 962 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 963 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 964 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"));CHKERRQ(ierr); 965 break; 966 case 5: 967 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 968 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 969 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 970 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"));CHKERRQ(ierr); 971 break; 972 case 6: 973 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 974 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 975 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 976 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"));CHKERRQ(ierr); 977 break; 978 case 7: 979 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 980 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 981 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 982 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"));CHKERRQ(ierr); 983 break; 984 default: 985 a->row = row; 986 a->icol = col; 987 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 988 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 989 990 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 991 ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr); 992 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 993 994 if (!a->solve_work) { 995 ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 996 ierr = PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 997 } 998 } 999 1000 ierr = MatCholeskyFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 1001 PetscFunctionReturn(0); 1002 } 1003 #endif 1004 1005 #undef __FUNCT__ 1006 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ" 1007 PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat A) 1008 { 1009 static PetscTruth called = PETSC_FALSE; 1010 MPI_Comm comm = A->comm; 1011 PetscErrorCode ierr; 1012 1013 PetscFunctionBegin; 1014 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1015 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1016 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1017 ierr = (*PetscHelpPrintf)(comm," -mat_ignore_ltriangular: Ignore lower triangular values set by user\n");CHKERRQ(ierr); 1018 PetscFunctionReturn(0); 1019 } 1020 1021 EXTERN_C_BEGIN 1022 #undef __FUNCT__ 1023 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1024 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,PetscInt *indices) 1025 { 1026 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1027 PetscInt i,nz,n; 1028 1029 PetscFunctionBegin; 1030 nz = baij->maxnz; 1031 n = mat->n; 1032 for (i=0; i<nz; i++) { 1033 baij->j[i] = indices[i]; 1034 } 1035 baij->nz = nz; 1036 for (i=0; i<n; i++) { 1037 baij->ilen[i] = baij->imax[i]; 1038 } 1039 PetscFunctionReturn(0); 1040 } 1041 EXTERN_C_END 1042 1043 #undef __FUNCT__ 1044 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1045 /*@ 1046 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1047 in the matrix. 1048 1049 Input Parameters: 1050 + mat - the SeqSBAIJ matrix 1051 - indices - the column indices 1052 1053 Level: advanced 1054 1055 Notes: 1056 This can be called if you have precomputed the nonzero structure of the 1057 matrix and want to provide it to the matrix object to improve the performance 1058 of the MatSetValues() operation. 1059 1060 You MUST have set the correct numbers of nonzeros per row in the call to 1061 MatCreateSeqSBAIJ(), and the columns indices MUST be sorted. 1062 1063 MUST be called before any calls to MatSetValues() 1064 1065 .seealso: MatCreateSeqSBAIJ 1066 @*/ 1067 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1068 { 1069 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1070 1071 PetscFunctionBegin; 1072 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1073 PetscValidPointer(indices,2); 1074 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1075 if (f) { 1076 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1077 } else { 1078 SETERRQ(PETSC_ERR_SUP,"Wrong type of matrix to set column indices"); 1079 } 1080 PetscFunctionReturn(0); 1081 } 1082 1083 #undef __FUNCT__ 1084 #define __FUNCT__ "MatCopy_SeqSBAIJ" 1085 PetscErrorCode MatCopy_SeqSBAIJ(Mat A,Mat B,MatStructure str) 1086 { 1087 PetscErrorCode ierr; 1088 1089 PetscFunctionBegin; 1090 /* If the two matrices have the same copy implementation, use fast copy. */ 1091 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 1092 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1093 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1094 1095 if (a->i[A->m] != b->i[B->m]) { 1096 SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 1097 } 1098 ierr = PetscMemcpy(b->a,a->a,(a->i[A->m])*sizeof(PetscScalar));CHKERRQ(ierr); 1099 } else { 1100 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1101 } 1102 PetscFunctionReturn(0); 1103 } 1104 1105 #undef __FUNCT__ 1106 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1107 PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A) 1108 { 1109 PetscErrorCode ierr; 1110 1111 PetscFunctionBegin; 1112 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1113 PetscFunctionReturn(0); 1114 } 1115 1116 #undef __FUNCT__ 1117 #define __FUNCT__ "MatGetArray_SeqSBAIJ" 1118 PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1119 { 1120 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1121 PetscFunctionBegin; 1122 *array = a->a; 1123 PetscFunctionReturn(0); 1124 } 1125 1126 #undef __FUNCT__ 1127 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ" 1128 PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1129 { 1130 PetscFunctionBegin; 1131 PetscFunctionReturn(0); 1132 } 1133 1134 #include "petscblaslapack.h" 1135 #undef __FUNCT__ 1136 #define __FUNCT__ "MatAXPY_SeqSBAIJ" 1137 PetscErrorCode MatAXPY_SeqSBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1138 { 1139 Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data; 1140 PetscErrorCode ierr; 1141 PetscInt i,bs=Y->bs,bs2,j; 1142 PetscBLASInt bnz = (PetscBLASInt)x->nz,one = 1; 1143 1144 PetscFunctionBegin; 1145 if (str == SAME_NONZERO_PATTERN) { 1146 PetscScalar alpha = a; 1147 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1148 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1149 if (y->xtoy && y->XtoY != X) { 1150 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1151 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1152 } 1153 if (!y->xtoy) { /* get xtoy */ 1154 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1155 y->XtoY = X; 1156 } 1157 bs2 = bs*bs; 1158 for (i=0; i<x->nz; i++) { 1159 j = 0; 1160 while (j < bs2){ 1161 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 1162 j++; 1163 } 1164 } 1165 ierr = PetscLogInfo((0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)));CHKERRQ(ierr); 1166 } else { 1167 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1168 } 1169 PetscFunctionReturn(0); 1170 } 1171 1172 #undef __FUNCT__ 1173 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ" 1174 PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg) 1175 { 1176 PetscFunctionBegin; 1177 *flg = PETSC_TRUE; 1178 PetscFunctionReturn(0); 1179 } 1180 1181 #undef __FUNCT__ 1182 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ" 1183 PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg) 1184 { 1185 PetscFunctionBegin; 1186 *flg = PETSC_TRUE; 1187 PetscFunctionReturn(0); 1188 } 1189 1190 #undef __FUNCT__ 1191 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ" 1192 PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscTruth *flg) 1193 { 1194 PetscFunctionBegin; 1195 *flg = PETSC_FALSE; 1196 PetscFunctionReturn(0); 1197 } 1198 1199 #undef __FUNCT__ 1200 #define __FUNCT__ "MatRealPart_SeqSBAIJ" 1201 PetscErrorCode MatRealPart_SeqSBAIJ(Mat A) 1202 { 1203 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1204 PetscInt i,nz = a->bs2*a->i[a->mbs]; 1205 PetscScalar *aa = a->a; 1206 1207 PetscFunctionBegin; 1208 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 1209 PetscFunctionReturn(0); 1210 } 1211 1212 #undef __FUNCT__ 1213 #define __FUNCT__ "MatImaginaryPart_SeqSBAIJ" 1214 PetscErrorCode MatImaginaryPart_SeqSBAIJ(Mat A) 1215 { 1216 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1217 PetscInt i,nz = a->bs2*a->i[a->mbs]; 1218 PetscScalar *aa = a->a; 1219 1220 PetscFunctionBegin; 1221 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 1222 PetscFunctionReturn(0); 1223 } 1224 1225 /* -------------------------------------------------------------------*/ 1226 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1227 MatGetRow_SeqSBAIJ, 1228 MatRestoreRow_SeqSBAIJ, 1229 MatMult_SeqSBAIJ_N, 1230 /* 4*/ MatMultAdd_SeqSBAIJ_N, 1231 MatMult_SeqSBAIJ_N, /* transpose versions are same as non-transpose versions */ 1232 MatMultAdd_SeqSBAIJ_N, 1233 MatSolve_SeqSBAIJ_N, 1234 0, 1235 0, 1236 /*10*/ 0, 1237 0, 1238 MatCholeskyFactor_SeqSBAIJ, 1239 MatRelax_SeqSBAIJ, 1240 MatTranspose_SeqSBAIJ, 1241 /*15*/ MatGetInfo_SeqSBAIJ, 1242 MatEqual_SeqSBAIJ, 1243 MatGetDiagonal_SeqSBAIJ, 1244 MatDiagonalScale_SeqSBAIJ, 1245 MatNorm_SeqSBAIJ, 1246 /*20*/ 0, 1247 MatAssemblyEnd_SeqSBAIJ, 1248 0, 1249 MatSetOption_SeqSBAIJ, 1250 MatZeroEntries_SeqSBAIJ, 1251 /*25*/ 0, 1252 0, 1253 0, 1254 MatCholeskyFactorSymbolic_SeqSBAIJ, 1255 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1256 /*30*/ MatSetUpPreallocation_SeqSBAIJ, 1257 0, 1258 MatICCFactorSymbolic_SeqSBAIJ, 1259 MatGetArray_SeqSBAIJ, 1260 MatRestoreArray_SeqSBAIJ, 1261 /*35*/ MatDuplicate_SeqSBAIJ, 1262 0, 1263 0, 1264 0, 1265 0, 1266 /*40*/ MatAXPY_SeqSBAIJ, 1267 MatGetSubMatrices_SeqSBAIJ, 1268 MatIncreaseOverlap_SeqSBAIJ, 1269 MatGetValues_SeqSBAIJ, 1270 MatCopy_SeqSBAIJ, 1271 /*45*/ MatPrintHelp_SeqSBAIJ, 1272 MatScale_SeqSBAIJ, 1273 0, 1274 0, 1275 0, 1276 /*50*/ 0, 1277 MatGetRowIJ_SeqSBAIJ, 1278 MatRestoreRowIJ_SeqSBAIJ, 1279 0, 1280 0, 1281 /*55*/ 0, 1282 0, 1283 0, 1284 0, 1285 MatSetValuesBlocked_SeqSBAIJ, 1286 /*60*/ MatGetSubMatrix_SeqSBAIJ, 1287 0, 1288 0, 1289 MatGetPetscMaps_Petsc, 1290 0, 1291 /*65*/ 0, 1292 0, 1293 0, 1294 0, 1295 0, 1296 /*70*/ MatGetRowMax_SeqSBAIJ, 1297 0, 1298 0, 1299 0, 1300 0, 1301 /*75*/ 0, 1302 0, 1303 0, 1304 0, 1305 0, 1306 /*80*/ 0, 1307 0, 1308 0, 1309 #if !defined(PETSC_USE_COMPLEX) 1310 MatGetInertia_SeqSBAIJ, 1311 #else 1312 0, 1313 #endif 1314 MatLoad_SeqSBAIJ, 1315 /*85*/ MatIsSymmetric_SeqSBAIJ, 1316 MatIsHermitian_SeqSBAIJ, 1317 MatIsStructurallySymmetric_SeqSBAIJ, 1318 0, 1319 0, 1320 /*90*/ 0, 1321 0, 1322 0, 1323 0, 1324 0, 1325 /*95*/ 0, 1326 0, 1327 0, 1328 0, 1329 0, 1330 /*100*/0, 1331 0, 1332 0, 1333 0, 1334 0, 1335 /*105*/0, 1336 MatRealPart_SeqSBAIJ, 1337 MatImaginaryPart_SeqSBAIJ 1338 }; 1339 1340 EXTERN_C_BEGIN 1341 #undef __FUNCT__ 1342 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1343 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqSBAIJ(Mat mat) 1344 { 1345 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1346 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 1347 PetscErrorCode ierr; 1348 1349 PetscFunctionBegin; 1350 if (aij->nonew != 1) { 1351 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1352 } 1353 1354 /* allocate space for values if not already there */ 1355 if (!aij->saved_values) { 1356 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1357 } 1358 1359 /* copy values over */ 1360 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1361 PetscFunctionReturn(0); 1362 } 1363 EXTERN_C_END 1364 1365 EXTERN_C_BEGIN 1366 #undef __FUNCT__ 1367 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1368 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqSBAIJ(Mat mat) 1369 { 1370 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1371 PetscErrorCode ierr; 1372 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 1373 1374 PetscFunctionBegin; 1375 if (aij->nonew != 1) { 1376 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1377 } 1378 if (!aij->saved_values) { 1379 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 1380 } 1381 1382 /* copy values over */ 1383 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1384 PetscFunctionReturn(0); 1385 } 1386 EXTERN_C_END 1387 1388 EXTERN_C_BEGIN 1389 #undef __FUNCT__ 1390 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ" 1391 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 1392 { 1393 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1394 PetscErrorCode ierr; 1395 PetscInt i,mbs,bs2; 1396 PetscTruth skipallocation = PETSC_FALSE,flg; 1397 1398 PetscFunctionBegin; 1399 B->preallocated = PETSC_TRUE; 1400 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1401 mbs = B->m/bs; 1402 bs2 = bs*bs; 1403 1404 if (mbs*bs != B->m) { 1405 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1406 } 1407 1408 if (nz == MAT_SKIP_ALLOCATION) { 1409 skipallocation = PETSC_TRUE; 1410 nz = 0; 1411 } 1412 1413 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1414 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 1415 if (nnz) { 1416 for (i=0; i<mbs; i++) { 1417 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 1418 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],mbs); 1419 } 1420 } 1421 1422 ierr = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1423 if (!flg) { 1424 switch (bs) { 1425 case 1: 1426 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1427 B->ops->solve = MatSolve_SeqSBAIJ_1; 1428 B->ops->solves = MatSolves_SeqSBAIJ_1; 1429 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 1430 B->ops->mult = MatMult_SeqSBAIJ_1; 1431 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1432 B->ops->multtranspose = MatMult_SeqSBAIJ_1; 1433 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_1; 1434 break; 1435 case 2: 1436 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1437 B->ops->solve = MatSolve_SeqSBAIJ_2; 1438 B->ops->solvetranspose = MatSolve_SeqSBAIJ_2; 1439 B->ops->mult = MatMult_SeqSBAIJ_2; 1440 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1441 B->ops->multtranspose = MatMult_SeqSBAIJ_2; 1442 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_2; 1443 break; 1444 case 3: 1445 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1446 B->ops->solve = MatSolve_SeqSBAIJ_3; 1447 B->ops->solvetranspose = MatSolve_SeqSBAIJ_3; 1448 B->ops->mult = MatMult_SeqSBAIJ_3; 1449 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1450 B->ops->multtranspose = MatMult_SeqSBAIJ_3; 1451 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_3; 1452 break; 1453 case 4: 1454 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1455 B->ops->solve = MatSolve_SeqSBAIJ_4; 1456 B->ops->solvetranspose = MatSolve_SeqSBAIJ_4; 1457 B->ops->mult = MatMult_SeqSBAIJ_4; 1458 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1459 B->ops->multtranspose = MatMult_SeqSBAIJ_4; 1460 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_4; 1461 break; 1462 case 5: 1463 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1464 B->ops->solve = MatSolve_SeqSBAIJ_5; 1465 B->ops->solvetranspose = MatSolve_SeqSBAIJ_5; 1466 B->ops->mult = MatMult_SeqSBAIJ_5; 1467 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1468 B->ops->multtranspose = MatMult_SeqSBAIJ_5; 1469 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_5; 1470 break; 1471 case 6: 1472 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1473 B->ops->solve = MatSolve_SeqSBAIJ_6; 1474 B->ops->solvetranspose = MatSolve_SeqSBAIJ_6; 1475 B->ops->mult = MatMult_SeqSBAIJ_6; 1476 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1477 B->ops->multtranspose = MatMult_SeqSBAIJ_6; 1478 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_6; 1479 break; 1480 case 7: 1481 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1482 B->ops->solve = MatSolve_SeqSBAIJ_7; 1483 B->ops->solvetranspose = MatSolve_SeqSBAIJ_7; 1484 B->ops->mult = MatMult_SeqSBAIJ_7; 1485 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1486 B->ops->multtranspose = MatMult_SeqSBAIJ_7; 1487 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_7; 1488 break; 1489 } 1490 } 1491 1492 b->mbs = mbs; 1493 b->nbs = mbs; 1494 if (!skipallocation) { 1495 /* b->ilen will count nonzeros in each block row so far. */ 1496 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 1497 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1498 if (!nnz) { 1499 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1500 else if (nz <= 0) nz = 1; 1501 for (i=0; i<mbs; i++) { 1502 b->imax[i] = nz; 1503 } 1504 nz = nz*mbs; /* total nz */ 1505 } else { 1506 nz = 0; 1507 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1508 } 1509 /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1510 1511 /* allocate the matrix space */ 1512 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->m+1,PetscInt,&b->i);CHKERRQ(ierr); 1513 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1514 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1515 b->singlemalloc = PETSC_TRUE; 1516 1517 /* pointer to beginning of each row */ 1518 b->i[0] = 0; 1519 for (i=1; i<mbs+1; i++) { 1520 b->i[i] = b->i[i-1] + b->imax[i-1]; 1521 } 1522 } 1523 1524 B->bs = bs; 1525 b->bs2 = bs2; 1526 b->nz = 0; 1527 b->maxnz = nz*bs2; 1528 1529 b->inew = 0; 1530 b->jnew = 0; 1531 b->anew = 0; 1532 b->a2anew = 0; 1533 b->permute = PETSC_FALSE; 1534 PetscFunctionReturn(0); 1535 } 1536 EXTERN_C_END 1537 1538 EXTERN_C_BEGIN 1539 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 1540 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqBAIJ(Mat, MatType,MatReuse,Mat*); 1541 EXTERN_C_END 1542 1543 /*MC 1544 MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices, 1545 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1546 1547 Options Database Keys: 1548 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions() 1549 1550 Level: beginner 1551 1552 .seealso: MatCreateSeqSBAIJ 1553 M*/ 1554 1555 EXTERN_C_BEGIN 1556 #undef __FUNCT__ 1557 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1558 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqSBAIJ(Mat B) 1559 { 1560 Mat_SeqSBAIJ *b; 1561 PetscErrorCode ierr; 1562 PetscMPIInt size; 1563 PetscTruth flg; 1564 1565 PetscFunctionBegin; 1566 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1567 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1568 B->m = B->M = PetscMax(B->m,B->M); 1569 B->n = B->N = PetscMax(B->n,B->N); 1570 1571 ierr = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1572 B->data = (void*)b; 1573 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1574 B->ops->destroy = MatDestroy_SeqSBAIJ; 1575 B->ops->view = MatView_SeqSBAIJ; 1576 B->factor = 0; 1577 B->mapping = 0; 1578 b->row = 0; 1579 b->icol = 0; 1580 b->reallocs = 0; 1581 b->saved_values = 0; 1582 1583 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1584 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1585 1586 b->sorted = PETSC_FALSE; 1587 b->roworiented = PETSC_TRUE; 1588 b->nonew = 0; 1589 b->diag = 0; 1590 b->solve_work = 0; 1591 b->mult_work = 0; 1592 B->spptr = 0; 1593 b->keepzeroedrows = PETSC_FALSE; 1594 b->xtoy = 0; 1595 b->XtoY = 0; 1596 1597 b->inew = 0; 1598 b->jnew = 0; 1599 b->anew = 0; 1600 b->a2anew = 0; 1601 b->permute = PETSC_FALSE; 1602 1603 b->ignore_ltriangular = PETSC_FALSE; 1604 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_ignore_lower_triangular",&flg);CHKERRQ(ierr); 1605 if (flg) b->ignore_ltriangular = PETSC_TRUE; 1606 1607 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1608 "MatStoreValues_SeqSBAIJ", 1609 MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1610 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1611 "MatRetrieveValues_SeqSBAIJ", 1612 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1613 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1614 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1615 MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1616 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C", 1617 "MatConvert_SeqSBAIJ_SeqAIJ", 1618 MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr); 1619 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C", 1620 "MatConvert_SeqSBAIJ_SeqBAIJ", 1621 MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr); 1622 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C", 1623 "MatSeqSBAIJSetPreallocation_SeqSBAIJ", 1624 MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr); 1625 1626 B->symmetric = PETSC_TRUE; 1627 B->structurally_symmetric = PETSC_TRUE; 1628 B->symmetric_set = PETSC_TRUE; 1629 B->structurally_symmetric_set = PETSC_TRUE; 1630 PetscFunctionReturn(0); 1631 } 1632 EXTERN_C_END 1633 1634 #undef __FUNCT__ 1635 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1636 /*@C 1637 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1638 compressed row) format. For good matrix assembly performance the 1639 user should preallocate the matrix storage by setting the parameter nz 1640 (or the array nnz). By setting these parameters accurately, performance 1641 during matrix assembly can be increased by more than a factor of 50. 1642 1643 Collective on Mat 1644 1645 Input Parameters: 1646 + A - the symmetric matrix 1647 . bs - size of block 1648 . nz - number of block nonzeros per block row (same for all rows) 1649 - nnz - array containing the number of block nonzeros in the upper triangular plus 1650 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1651 1652 Options Database Keys: 1653 . -mat_no_unroll - uses code that does not unroll the loops in the 1654 block calculations (much slower) 1655 . -mat_block_size - size of the blocks to use 1656 1657 Level: intermediate 1658 1659 Notes: 1660 Specify the preallocated storage with either nz or nnz (not both). 1661 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1662 allocation. For additional details, see the users manual chapter on 1663 matrices. 1664 1665 If the nnz parameter is given then the nz parameter is ignored 1666 1667 1668 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1669 @*/ 1670 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 1671 { 1672 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 1673 1674 PetscFunctionBegin; 1675 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1676 if (f) { 1677 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 1678 } 1679 PetscFunctionReturn(0); 1680 } 1681 1682 #undef __FUNCT__ 1683 #define __FUNCT__ "MatCreateSeqSBAIJ" 1684 /*@C 1685 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1686 compressed row) format. For good matrix assembly performance the 1687 user should preallocate the matrix storage by setting the parameter nz 1688 (or the array nnz). By setting these parameters accurately, performance 1689 during matrix assembly can be increased by more than a factor of 50. 1690 1691 Collective on MPI_Comm 1692 1693 Input Parameters: 1694 + comm - MPI communicator, set to PETSC_COMM_SELF 1695 . bs - size of block 1696 . m - number of rows, or number of columns 1697 . nz - number of block nonzeros per block row (same for all rows) 1698 - nnz - array containing the number of block nonzeros in the upper triangular plus 1699 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1700 1701 Output Parameter: 1702 . A - the symmetric matrix 1703 1704 Options Database Keys: 1705 . -mat_no_unroll - uses code that does not unroll the loops in the 1706 block calculations (much slower) 1707 . -mat_block_size - size of the blocks to use 1708 1709 Level: intermediate 1710 1711 Notes: 1712 1713 Specify the preallocated storage with either nz or nnz (not both). 1714 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1715 allocation. For additional details, see the users manual chapter on 1716 matrices. 1717 1718 If the nnz parameter is given then the nz parameter is ignored 1719 1720 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1721 @*/ 1722 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 1723 { 1724 PetscErrorCode ierr; 1725 1726 PetscFunctionBegin; 1727 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1728 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 1729 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1730 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 1731 PetscFunctionReturn(0); 1732 } 1733 1734 #undef __FUNCT__ 1735 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1736 PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1737 { 1738 Mat C; 1739 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1740 PetscErrorCode ierr; 1741 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 1742 1743 PetscFunctionBegin; 1744 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1745 1746 *B = 0; 1747 ierr = MatCreate(A->comm,&C);CHKERRQ(ierr); 1748 ierr = MatSetSizes(C,A->m,A->n,A->m,A->n);CHKERRQ(ierr); 1749 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 1750 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1751 c = (Mat_SeqSBAIJ*)C->data; 1752 1753 C->preallocated = PETSC_TRUE; 1754 C->factor = A->factor; 1755 c->row = 0; 1756 c->icol = 0; 1757 c->saved_values = 0; 1758 c->keepzeroedrows = a->keepzeroedrows; 1759 C->assembled = PETSC_TRUE; 1760 1761 C->M = A->M; 1762 C->N = A->N; 1763 C->bs = A->bs; 1764 c->bs2 = a->bs2; 1765 c->mbs = a->mbs; 1766 c->nbs = a->nbs; 1767 1768 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->imax);CHKERRQ(ierr); 1769 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->ilen);CHKERRQ(ierr); 1770 for (i=0; i<mbs; i++) { 1771 c->imax[i] = a->imax[i]; 1772 c->ilen[i] = a->ilen[i]; 1773 } 1774 ierr = PetscLogObjectMemory(C,2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr); 1775 1776 /* allocate the matrix space */ 1777 ierr = PetscMalloc3(bs2*nz,MatScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 1778 c->singlemalloc = PETSC_TRUE; 1779 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1780 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt)));CHKERRQ(ierr); 1781 if (mbs > 0) { 1782 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1783 if (cpvalues == MAT_COPY_VALUES) { 1784 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1785 } else { 1786 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1787 } 1788 } 1789 1790 c->sorted = a->sorted; 1791 c->roworiented = a->roworiented; 1792 c->nonew = a->nonew; 1793 1794 if (a->diag) { 1795 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 1796 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1797 for (i=0; i<mbs; i++) { 1798 c->diag[i] = a->diag[i]; 1799 } 1800 } else c->diag = 0; 1801 c->nz = a->nz; 1802 c->maxnz = a->maxnz; 1803 c->solve_work = 0; 1804 c->mult_work = 0; 1805 *B = C; 1806 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1807 PetscFunctionReturn(0); 1808 } 1809 1810 #undef __FUNCT__ 1811 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1812 PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer, MatType type,Mat *A) 1813 { 1814 Mat_SeqSBAIJ *a; 1815 Mat B; 1816 PetscErrorCode ierr; 1817 int fd; 1818 PetscMPIInt size; 1819 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 1820 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 1821 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 1822 PetscInt *masked,nmask,tmp,bs2,ishift; 1823 PetscScalar *aa; 1824 MPI_Comm comm = ((PetscObject)viewer)->comm; 1825 1826 PetscFunctionBegin; 1827 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1828 bs2 = bs*bs; 1829 1830 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1831 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1832 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1833 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1834 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1835 M = header[1]; N = header[2]; nz = header[3]; 1836 1837 if (header[3] < 0) { 1838 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1839 } 1840 1841 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1842 1843 /* 1844 This code adds extra rows to make sure the number of rows is 1845 divisible by the blocksize 1846 */ 1847 mbs = M/bs; 1848 extra_rows = bs - M + bs*(mbs); 1849 if (extra_rows == bs) extra_rows = 0; 1850 else mbs++; 1851 if (extra_rows) { 1852 ierr = PetscLogInfo((0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr); 1853 } 1854 1855 /* read in row lengths */ 1856 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1857 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1858 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1859 1860 /* read in column indices */ 1861 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1862 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1863 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1864 1865 /* loop over row lengths determining block row lengths */ 1866 ierr = PetscMalloc(mbs*sizeof(PetscInt),&s_browlengths);CHKERRQ(ierr); 1867 ierr = PetscMemzero(s_browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1868 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 1869 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1870 masked = mask + mbs; 1871 rowcount = 0; nzcount = 0; 1872 for (i=0; i<mbs; i++) { 1873 nmask = 0; 1874 for (j=0; j<bs; j++) { 1875 kmax = rowlengths[rowcount]; 1876 for (k=0; k<kmax; k++) { 1877 tmp = jj[nzcount++]/bs; /* block col. index */ 1878 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1879 } 1880 rowcount++; 1881 } 1882 s_browlengths[i] += nmask; 1883 1884 /* zero out the mask elements we set */ 1885 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1886 } 1887 1888 /* create our matrix */ 1889 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 1890 ierr = MatSetSizes(B,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 1891 ierr = MatSetType(B,type);CHKERRQ(ierr); 1892 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(B,bs,0,s_browlengths);CHKERRQ(ierr); 1893 a = (Mat_SeqSBAIJ*)B->data; 1894 1895 /* set matrix "i" values */ 1896 a->i[0] = 0; 1897 for (i=1; i<= mbs; i++) { 1898 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1899 a->ilen[i-1] = s_browlengths[i-1]; 1900 } 1901 a->nz = a->i[mbs]; 1902 1903 /* read in nonzero values */ 1904 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1905 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1906 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1907 1908 /* set "a" and "j" values into matrix */ 1909 nzcount = 0; jcount = 0; 1910 for (i=0; i<mbs; i++) { 1911 nzcountb = nzcount; 1912 nmask = 0; 1913 for (j=0; j<bs; j++) { 1914 kmax = rowlengths[i*bs+j]; 1915 for (k=0; k<kmax; k++) { 1916 tmp = jj[nzcount++]/bs; /* block col. index */ 1917 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1918 } 1919 } 1920 /* sort the masked values */ 1921 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1922 1923 /* set "j" values into matrix */ 1924 maskcount = 1; 1925 for (j=0; j<nmask; j++) { 1926 a->j[jcount++] = masked[j]; 1927 mask[masked[j]] = maskcount++; 1928 } 1929 1930 /* set "a" values into matrix */ 1931 ishift = bs2*a->i[i]; 1932 for (j=0; j<bs; j++) { 1933 kmax = rowlengths[i*bs+j]; 1934 for (k=0; k<kmax; k++) { 1935 tmp = jj[nzcountb]/bs ; /* block col. index */ 1936 if (tmp >= i){ 1937 block = mask[tmp] - 1; 1938 point = jj[nzcountb] - bs*tmp; 1939 idx = ishift + bs2*block + j + bs*point; 1940 a->a[idx] = aa[nzcountb]; 1941 } 1942 nzcountb++; 1943 } 1944 } 1945 /* zero out the mask elements we set */ 1946 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1947 } 1948 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1949 1950 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1951 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1952 ierr = PetscFree(aa);CHKERRQ(ierr); 1953 ierr = PetscFree(jj);CHKERRQ(ierr); 1954 ierr = PetscFree(mask);CHKERRQ(ierr); 1955 1956 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1957 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1958 ierr = MatView_Private(B);CHKERRQ(ierr); 1959 *A = B; 1960 PetscFunctionReturn(0); 1961 } 1962 1963 #undef __FUNCT__ 1964 #define __FUNCT__ "MatRelax_SeqSBAIJ" 1965 PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1966 { 1967 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1968 MatScalar *aa=a->a,*v,*v1; 1969 PetscScalar *x,*b,*t,sum,d; 1970 PetscErrorCode ierr; 1971 PetscInt m=a->mbs,bs=A->bs,*ai=a->i,*aj=a->j; 1972 PetscInt nz,nz1,*vj,*vj1,i; 1973 1974 PetscFunctionBegin; 1975 its = its*lits; 1976 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1977 1978 if (bs > 1) 1979 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 1980 1981 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1982 if (xx != bb) { 1983 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1984 } else { 1985 b = x; 1986 } 1987 1988 ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr); 1989 1990 if (flag & SOR_ZERO_INITIAL_GUESS) { 1991 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1992 for (i=0; i<m; i++) 1993 t[i] = b[i]; 1994 1995 for (i=0; i<m; i++){ 1996 d = *(aa + ai[i]); /* diag[i] */ 1997 v = aa + ai[i] + 1; 1998 vj = aj + ai[i] + 1; 1999 nz = ai[i+1] - ai[i] - 1; 2000 x[i] = omega*t[i]/d; 2001 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 2002 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2003 } 2004 } 2005 2006 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2007 for (i=0; i<m; i++) 2008 t[i] = b[i]; 2009 2010 for (i=0; i<m-1; i++){ /* update rhs */ 2011 v = aa + ai[i] + 1; 2012 vj = aj + ai[i] + 1; 2013 nz = ai[i+1] - ai[i] - 1; 2014 while (nz--) t[*vj++] -= x[i]*(*v++); 2015 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2016 } 2017 for (i=m-1; i>=0; i--){ 2018 d = *(aa + ai[i]); 2019 v = aa + ai[i] + 1; 2020 vj = aj + ai[i] + 1; 2021 nz = ai[i+1] - ai[i] - 1; 2022 sum = t[i]; 2023 while (nz--) sum -= x[*vj++]*(*v++); 2024 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2025 x[i] = (1-omega)*x[i] + omega*sum/d; 2026 } 2027 } 2028 its--; 2029 } 2030 2031 while (its--) { 2032 /* 2033 forward sweep: 2034 for i=0,...,m-1: 2035 sum[i] = (b[i] - U(i,:)x )/d[i]; 2036 x[i] = (1-omega)x[i] + omega*sum[i]; 2037 b = b - x[i]*U^T(i,:); 2038 2039 */ 2040 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2041 for (i=0; i<m; i++) 2042 t[i] = b[i]; 2043 2044 for (i=0; i<m; i++){ 2045 d = *(aa + ai[i]); /* diag[i] */ 2046 v = aa + ai[i] + 1; v1=v; 2047 vj = aj + ai[i] + 1; vj1=vj; 2048 nz = ai[i+1] - ai[i] - 1; nz1=nz; 2049 sum = t[i]; 2050 while (nz1--) sum -= (*v1++)*x[*vj1++]; 2051 x[i] = (1-omega)*x[i] + omega*sum/d; 2052 while (nz--) t[*vj++] -= x[i]*(*v++); 2053 ierr = PetscLogFlops(4*nz-2);CHKERRQ(ierr); 2054 } 2055 } 2056 2057 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2058 /* 2059 backward sweep: 2060 b = b - x[i]*U^T(i,:), i=0,...,n-2 2061 for i=m-1,...,0: 2062 sum[i] = (b[i] - U(i,:)x )/d[i]; 2063 x[i] = (1-omega)x[i] + omega*sum[i]; 2064 */ 2065 for (i=0; i<m; i++) 2066 t[i] = b[i]; 2067 2068 for (i=0; i<m-1; i++){ /* update rhs */ 2069 v = aa + ai[i] + 1; 2070 vj = aj + ai[i] + 1; 2071 nz = ai[i+1] - ai[i] - 1; 2072 while (nz--) t[*vj++] -= x[i]*(*v++); 2073 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2074 } 2075 for (i=m-1; i>=0; i--){ 2076 d = *(aa + ai[i]); 2077 v = aa + ai[i] + 1; 2078 vj = aj + ai[i] + 1; 2079 nz = ai[i+1] - ai[i] - 1; 2080 sum = t[i]; 2081 while (nz--) sum -= x[*vj++]*(*v++); 2082 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2083 x[i] = (1-omega)*x[i] + omega*sum/d; 2084 } 2085 } 2086 } 2087 2088 ierr = PetscFree(t);CHKERRQ(ierr); 2089 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2090 if (bb != xx) { 2091 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2092 } 2093 PetscFunctionReturn(0); 2094 } 2095 2096 2097 2098 2099 2100 2101