1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the SBAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 8 #include "src/inline/spops.h" 9 #include "src/mat/impls/sbaij/seq/sbaij.h" 10 11 #define CHUNKSIZE 10 12 13 /* 14 Checks for missing diagonals 15 */ 16 #undef __FUNCT__ 17 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 18 PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A) 19 { 20 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 21 PetscErrorCode ierr; 22 PetscInt *diag,*jj = a->j,i; 23 24 PetscFunctionBegin; 25 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 26 diag = a->diag; 27 for (i=0; i<a->mbs; i++) { 28 if (jj[diag[i]] != i) SETERRQ1(PETSC_ERR_ARG_CORRUPT,"Matrix is missing diagonal number %D",i); 29 } 30 PetscFunctionReturn(0); 31 } 32 33 #undef __FUNCT__ 34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 35 PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A) 36 { 37 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 38 PetscErrorCode ierr; 39 PetscInt i,mbs = a->mbs; 40 41 PetscFunctionBegin; 42 if (a->diag) PetscFunctionReturn(0); 43 44 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 45 ierr = PetscLogObjectMemory(A,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 46 for (i=0; i<mbs; i++) a->diag[i] = a->i[i]; 47 PetscFunctionReturn(0); 48 } 49 50 #undef __FUNCT__ 51 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 52 static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 53 { 54 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 55 PetscInt n = a->mbs,i; 56 57 PetscFunctionBegin; 58 *nn = n; 59 if (!ia) PetscFunctionReturn(0); 60 61 if (oshift == 1) { 62 /* temporarily add 1 to i and j indices */ 63 PetscInt nz = a->i[n]; 64 for (i=0; i<nz; i++) a->j[i]++; 65 for (i=0; i<n+1; i++) a->i[i]++; 66 *ia = a->i; *ja = a->j; 67 } else { 68 *ia = a->i; *ja = a->j; 69 } 70 PetscFunctionReturn(0); 71 } 72 73 #undef __FUNCT__ 74 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 75 static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 76 { 77 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 78 PetscInt i,n = a->mbs; 79 80 PetscFunctionBegin; 81 if (!ia) PetscFunctionReturn(0); 82 83 if (oshift == 1) { 84 PetscInt nz = a->i[n]-1; 85 for (i=0; i<nz; i++) a->j[i]--; 86 for (i=0; i<n+1; i++) a->i[i]--; 87 } 88 PetscFunctionReturn(0); 89 } 90 91 #undef __FUNCT__ 92 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 93 PetscErrorCode MatDestroy_SeqSBAIJ(Mat A) 94 { 95 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 96 PetscErrorCode ierr; 97 98 PetscFunctionBegin; 99 PetscLogObjectState((PetscObject)A,"Rows=%D, NZ=%D",A->m,a->nz); 100 ierr = MatSeqXAIJFreeAIJ(a->singlemalloc,&a->a,&a->j,&a->i);CHKERRQ(ierr); 101 if (a->row) { 102 ierr = ISDestroy(a->row);CHKERRQ(ierr); 103 } 104 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 105 if (a->imax) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 106 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 107 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 108 if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);} 109 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 110 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 111 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 112 113 if (a->inew){ 114 ierr = PetscFree(a->inew);CHKERRQ(ierr); 115 a->inew = 0; 116 } 117 ierr = PetscFree(a);CHKERRQ(ierr); 118 119 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 120 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 121 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 122 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 123 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqbaij_C","",PETSC_NULL);CHKERRQ(ierr); 124 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 125 PetscFunctionReturn(0); 126 } 127 128 #undef __FUNCT__ 129 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 130 PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op) 131 { 132 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 133 PetscErrorCode ierr; 134 135 PetscFunctionBegin; 136 switch (op) { 137 case MAT_ROW_ORIENTED: 138 a->roworiented = PETSC_TRUE; 139 break; 140 case MAT_COLUMN_ORIENTED: 141 a->roworiented = PETSC_FALSE; 142 break; 143 case MAT_COLUMNS_SORTED: 144 a->sorted = PETSC_TRUE; 145 break; 146 case MAT_COLUMNS_UNSORTED: 147 a->sorted = PETSC_FALSE; 148 break; 149 case MAT_KEEP_ZEROED_ROWS: 150 a->keepzeroedrows = PETSC_TRUE; 151 break; 152 case MAT_NO_NEW_NONZERO_LOCATIONS: 153 a->nonew = 1; 154 break; 155 case MAT_NEW_NONZERO_LOCATION_ERR: 156 a->nonew = -1; 157 break; 158 case MAT_NEW_NONZERO_ALLOCATION_ERR: 159 a->nonew = -2; 160 break; 161 case MAT_YES_NEW_NONZERO_LOCATIONS: 162 a->nonew = 0; 163 break; 164 case MAT_ROWS_SORTED: 165 case MAT_ROWS_UNSORTED: 166 case MAT_YES_NEW_DIAGONALS: 167 case MAT_IGNORE_OFF_PROC_ENTRIES: 168 case MAT_USE_HASH_TABLE: 169 ierr = PetscLogInfo((A,"MatSetOption_SeqSBAIJ:Option ignored\n"));CHKERRQ(ierr); 170 break; 171 case MAT_NO_NEW_DIAGONALS: 172 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 173 case MAT_NOT_SYMMETRIC: 174 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 175 case MAT_HERMITIAN: 176 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 177 case MAT_SYMMETRIC: 178 case MAT_STRUCTURALLY_SYMMETRIC: 179 case MAT_NOT_HERMITIAN: 180 case MAT_SYMMETRY_ETERNAL: 181 case MAT_NOT_SYMMETRY_ETERNAL: 182 case MAT_IGNORE_LOWER_TRIANGULAR: 183 a->ignore_ltriangular = PETSC_TRUE; 184 break; 185 case MAT_ERROR_LOWER_TRIANGULAR: 186 a->ignore_ltriangular = PETSC_FALSE; 187 break; 188 default: 189 SETERRQ(PETSC_ERR_SUP,"unknown option"); 190 } 191 PetscFunctionReturn(0); 192 } 193 194 #undef __FUNCT__ 195 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 196 PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **v) 197 { 198 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 199 PetscErrorCode ierr; 200 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2; 201 MatScalar *aa,*aa_i; 202 PetscScalar *v_i; 203 204 PetscFunctionBegin; 205 bs = A->bs; 206 ai = a->i; 207 aj = a->j; 208 aa = a->a; 209 bs2 = a->bs2; 210 211 if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %D out of range", row); 212 213 bn = row/bs; /* Block number */ 214 bp = row % bs; /* Block position */ 215 M = ai[bn+1] - ai[bn]; 216 *ncols = bs*M; 217 218 if (v) { 219 *v = 0; 220 if (*ncols) { 221 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 222 for (i=0; i<M; i++) { /* for each block in the block row */ 223 v_i = *v + i*bs; 224 aa_i = aa + bs2*(ai[bn] + i); 225 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 226 } 227 } 228 } 229 230 if (cols) { 231 *cols = 0; 232 if (*ncols) { 233 ierr = PetscMalloc((*ncols+row)*sizeof(PetscInt),cols);CHKERRQ(ierr); 234 for (i=0; i<M; i++) { /* for each block in the block row */ 235 cols_i = *cols + i*bs; 236 itmp = bs*aj[ai[bn] + i]; 237 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 238 } 239 } 240 } 241 242 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 243 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 244 #ifdef column_search 245 v_i = *v + M*bs; 246 cols_i = *cols + M*bs; 247 for (i=0; i<bn; i++){ /* for each block row */ 248 M = ai[i+1] - ai[i]; 249 for (j=0; j<M; j++){ 250 itmp = aj[ai[i] + j]; /* block column value */ 251 if (itmp == bn){ 252 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 253 for (k=0; k<bs; k++) { 254 *cols_i++ = i*bs+k; 255 *v_i++ = aa_i[k]; 256 } 257 *ncols += bs; 258 break; 259 } 260 } 261 } 262 #endif 263 PetscFunctionReturn(0); 264 } 265 266 #undef __FUNCT__ 267 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 268 PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 269 { 270 PetscErrorCode ierr; 271 272 PetscFunctionBegin; 273 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 274 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 275 PetscFunctionReturn(0); 276 } 277 278 #undef __FUNCT__ 279 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 280 PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,Mat *B) 281 { 282 PetscErrorCode ierr; 283 PetscFunctionBegin; 284 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 285 PetscFunctionReturn(0); 286 } 287 288 #undef __FUNCT__ 289 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 290 static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 291 { 292 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 293 PetscErrorCode ierr; 294 PetscInt i,j,bs = A->bs,k,l,bs2=a->bs2; 295 const char *name; 296 PetscViewerFormat format; 297 298 PetscFunctionBegin; 299 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 300 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 301 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 302 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 303 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 304 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 305 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 306 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 307 for (i=0; i<a->mbs; i++) { 308 for (j=0; j<bs; j++) { 309 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 310 for (k=a->i[i]; k<a->i[i+1]; k++) { 311 for (l=0; l<bs; l++) { 312 #if defined(PETSC_USE_COMPLEX) 313 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 314 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 315 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 316 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 317 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 318 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 319 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 320 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 321 } 322 #else 323 if (a->a[bs2*k + l*bs + j] != 0.0) { 324 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 325 } 326 #endif 327 } 328 } 329 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 330 } 331 } 332 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 333 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 334 PetscFunctionReturn(0); 335 } else { 336 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 337 for (i=0; i<a->mbs; i++) { 338 for (j=0; j<bs; j++) { 339 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 340 for (k=a->i[i]; k<a->i[i+1]; k++) { 341 for (l=0; l<bs; l++) { 342 #if defined(PETSC_USE_COMPLEX) 343 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 344 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 345 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 346 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 347 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 348 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 349 } else { 350 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 351 } 352 #else 353 ierr = PetscViewerASCIIPrintf(viewer," %D %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 354 #endif 355 } 356 } 357 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 358 } 359 } 360 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 361 } 362 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 363 PetscFunctionReturn(0); 364 } 365 366 #undef __FUNCT__ 367 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 368 static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 369 { 370 Mat A = (Mat) Aa; 371 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 372 PetscErrorCode ierr; 373 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->bs,bs2=a->bs2; 374 PetscMPIInt rank; 375 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 376 MatScalar *aa; 377 MPI_Comm comm; 378 PetscViewer viewer; 379 380 PetscFunctionBegin; 381 /* 382 This is nasty. If this is called from an originally parallel matrix 383 then all processes call this,but only the first has the matrix so the 384 rest should return immediately. 385 */ 386 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 387 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 388 if (rank) PetscFunctionReturn(0); 389 390 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 391 392 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 393 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 394 395 /* loop over matrix elements drawing boxes */ 396 color = PETSC_DRAW_BLUE; 397 for (i=0,row=0; i<mbs; i++,row+=bs) { 398 for (j=a->i[i]; j<a->i[i+1]; j++) { 399 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 400 x_l = a->j[j]*bs; x_r = x_l + 1.0; 401 aa = a->a + j*bs2; 402 for (k=0; k<bs; k++) { 403 for (l=0; l<bs; l++) { 404 if (PetscRealPart(*aa++) >= 0.) continue; 405 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 406 } 407 } 408 } 409 } 410 color = PETSC_DRAW_CYAN; 411 for (i=0,row=0; i<mbs; i++,row+=bs) { 412 for (j=a->i[i]; j<a->i[i+1]; j++) { 413 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 414 x_l = a->j[j]*bs; x_r = x_l + 1.0; 415 aa = a->a + j*bs2; 416 for (k=0; k<bs; k++) { 417 for (l=0; l<bs; l++) { 418 if (PetscRealPart(*aa++) != 0.) continue; 419 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 420 } 421 } 422 } 423 } 424 425 color = PETSC_DRAW_RED; 426 for (i=0,row=0; i<mbs; i++,row+=bs) { 427 for (j=a->i[i]; j<a->i[i+1]; j++) { 428 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 429 x_l = a->j[j]*bs; x_r = x_l + 1.0; 430 aa = a->a + j*bs2; 431 for (k=0; k<bs; k++) { 432 for (l=0; l<bs; l++) { 433 if (PetscRealPart(*aa++) <= 0.) continue; 434 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 435 } 436 } 437 } 438 } 439 PetscFunctionReturn(0); 440 } 441 442 #undef __FUNCT__ 443 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 444 static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 445 { 446 PetscErrorCode ierr; 447 PetscReal xl,yl,xr,yr,w,h; 448 PetscDraw draw; 449 PetscTruth isnull; 450 451 PetscFunctionBegin; 452 453 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 454 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 455 456 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 457 xr = A->m; yr = A->m; h = yr/10.0; w = xr/10.0; 458 xr += w; yr += h; xl = -w; yl = -h; 459 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 460 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 461 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 462 PetscFunctionReturn(0); 463 } 464 465 #undef __FUNCT__ 466 #define __FUNCT__ "MatView_SeqSBAIJ" 467 PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 468 { 469 PetscErrorCode ierr; 470 PetscTruth iascii,isdraw; 471 472 PetscFunctionBegin; 473 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 474 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 475 if (iascii){ 476 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 477 } else if (isdraw) { 478 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 479 } else { 480 Mat B; 481 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 482 ierr = MatView(B,viewer);CHKERRQ(ierr); 483 ierr = MatDestroy(B);CHKERRQ(ierr); 484 } 485 PetscFunctionReturn(0); 486 } 487 488 489 #undef __FUNCT__ 490 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 491 PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 492 { 493 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 494 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 495 PetscInt *ai = a->i,*ailen = a->ilen; 496 PetscInt brow,bcol,ridx,cidx,bs=A->bs,bs2=a->bs2; 497 MatScalar *ap,*aa = a->a,zero = 0.0; 498 499 PetscFunctionBegin; 500 for (k=0; k<m; k++) { /* loop over rows */ 501 row = im[k]; brow = row/bs; 502 if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",row); 503 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1); 504 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 505 nrow = ailen[brow]; 506 for (l=0; l<n; l++) { /* loop over columns */ 507 if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",in[l]); 508 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->n-1); 509 col = in[l] ; 510 bcol = col/bs; 511 cidx = col%bs; 512 ridx = row%bs; 513 high = nrow; 514 low = 0; /* assume unsorted */ 515 while (high-low > 5) { 516 t = (low+high)/2; 517 if (rp[t] > bcol) high = t; 518 else low = t; 519 } 520 for (i=low; i<high; i++) { 521 if (rp[i] > bcol) break; 522 if (rp[i] == bcol) { 523 *v++ = ap[bs2*i+bs*cidx+ridx]; 524 goto finished; 525 } 526 } 527 *v++ = zero; 528 finished:; 529 } 530 } 531 PetscFunctionReturn(0); 532 } 533 534 535 #undef __FUNCT__ 536 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 537 PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 538 { 539 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 540 PetscErrorCode ierr; 541 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 542 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 543 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->bs,stepval; 544 PetscTruth roworiented=a->roworiented; 545 const MatScalar *value = v; 546 MatScalar *ap,*aa = a->a,*bap; 547 548 PetscFunctionBegin; 549 if (roworiented) { 550 stepval = (n-1)*bs; 551 } else { 552 stepval = (m-1)*bs; 553 } 554 for (k=0; k<m; k++) { /* loop over added rows */ 555 row = im[k]; 556 if (row < 0) continue; 557 #if defined(PETSC_USE_DEBUG) 558 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 559 #endif 560 rp = aj + ai[row]; 561 ap = aa + bs2*ai[row]; 562 rmax = imax[row]; 563 nrow = ailen[row]; 564 low = 0; 565 high = nrow; 566 for (l=0; l<n; l++) { /* loop over added columns */ 567 if (in[l] < 0) continue; 568 col = in[l]; 569 #if defined(PETSC_USE_DEBUG) 570 if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",col,a->nbs-1); 571 #endif 572 if (col < row) continue; /* ignore lower triangular block */ 573 if (roworiented) { 574 value = v + k*(stepval+bs)*bs + l*bs; 575 } else { 576 value = v + l*(stepval+bs)*bs + k*bs; 577 } 578 if (col <= lastcol) low = 0; else high = nrow; 579 lastcol = col; 580 while (high-low > 7) { 581 t = (low+high)/2; 582 if (rp[t] > col) high = t; 583 else low = t; 584 } 585 for (i=low; i<high; i++) { 586 if (rp[i] > col) break; 587 if (rp[i] == col) { 588 bap = ap + bs2*i; 589 if (roworiented) { 590 if (is == ADD_VALUES) { 591 for (ii=0; ii<bs; ii++,value+=stepval) { 592 for (jj=ii; jj<bs2; jj+=bs) { 593 bap[jj] += *value++; 594 } 595 } 596 } else { 597 for (ii=0; ii<bs; ii++,value+=stepval) { 598 for (jj=ii; jj<bs2; jj+=bs) { 599 bap[jj] = *value++; 600 } 601 } 602 } 603 } else { 604 if (is == ADD_VALUES) { 605 for (ii=0; ii<bs; ii++,value+=stepval) { 606 for (jj=0; jj<bs; jj++) { 607 *bap++ += *value++; 608 } 609 } 610 } else { 611 for (ii=0; ii<bs; ii++,value+=stepval) { 612 for (jj=0; jj<bs; jj++) { 613 *bap++ = *value++; 614 } 615 } 616 } 617 } 618 goto noinsert2; 619 } 620 } 621 if (nonew == 1) goto noinsert2; 622 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 623 MatSeqXAIJReallocateAIJ(a,bs2,nrow,row,col,rmax,aa,ai,aj,a->mbs,rp,ap,imax,nonew); 624 N = nrow++ - 1; 625 /* shift up all the later entries in this row */ 626 for (ii=N; ii>=i; ii--) { 627 rp[ii+1] = rp[ii]; 628 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 629 } 630 if (N >= i) { 631 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 632 } 633 rp[i] = col; 634 bap = ap + bs2*i; 635 if (roworiented) { 636 for (ii=0; ii<bs; ii++,value+=stepval) { 637 for (jj=ii; jj<bs2; jj+=bs) { 638 bap[jj] = *value++; 639 } 640 } 641 } else { 642 for (ii=0; ii<bs; ii++,value+=stepval) { 643 for (jj=0; jj<bs; jj++) { 644 *bap++ = *value++; 645 } 646 } 647 } 648 noinsert2:; 649 low = i; 650 } 651 ailen[row] = nrow; 652 } 653 PetscFunctionReturn(0); 654 } 655 656 #undef __FUNCT__ 657 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 658 PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 659 { 660 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 661 PetscErrorCode ierr; 662 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 663 PetscInt m = A->m,*ip,N,*ailen = a->ilen; 664 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 665 MatScalar *aa = a->a,*ap; 666 667 PetscFunctionBegin; 668 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 669 670 if (m) rmax = ailen[0]; 671 for (i=1; i<mbs; i++) { 672 /* move each row back by the amount of empty slots (fshift) before it*/ 673 fshift += imax[i-1] - ailen[i-1]; 674 rmax = PetscMax(rmax,ailen[i]); 675 if (fshift) { 676 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 677 N = ailen[i]; 678 for (j=0; j<N; j++) { 679 ip[j-fshift] = ip[j]; 680 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 681 } 682 } 683 ai[i] = ai[i-1] + ailen[i-1]; 684 } 685 if (mbs) { 686 fshift += imax[mbs-1] - ailen[mbs-1]; 687 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 688 } 689 /* reset ilen and imax for each row */ 690 for (i=0; i<mbs; i++) { 691 ailen[i] = imax[i] = ai[i+1] - ai[i]; 692 } 693 a->nz = ai[mbs]; 694 695 /* diagonals may have moved, reset it */ 696 if (a->diag) { 697 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 698 } 699 ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n", 700 m,A->m,A->bs,fshift*bs2,a->nz*bs2));CHKERRQ(ierr); 701 ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %D\n",a->reallocs));CHKERRQ(ierr); 702 ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %D\n",rmax));CHKERRQ(ierr); 703 a->reallocs = 0; 704 A->info.nz_unneeded = (PetscReal)fshift*bs2; 705 PetscFunctionReturn(0); 706 } 707 708 /* 709 This function returns an array of flags which indicate the locations of contiguous 710 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 711 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 712 Assume: sizes should be long enough to hold all the values. 713 */ 714 #undef __FUNCT__ 715 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 716 PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 717 { 718 PetscInt i,j,k,row; 719 PetscTruth flg; 720 721 PetscFunctionBegin; 722 for (i=0,j=0; i<n; j++) { 723 row = idx[i]; 724 if (row%bs!=0) { /* Not the begining of a block */ 725 sizes[j] = 1; 726 i++; 727 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 728 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 729 i++; 730 } else { /* Begining of the block, so check if the complete block exists */ 731 flg = PETSC_TRUE; 732 for (k=1; k<bs; k++) { 733 if (row+k != idx[i+k]) { /* break in the block */ 734 flg = PETSC_FALSE; 735 break; 736 } 737 } 738 if (flg) { /* No break in the bs */ 739 sizes[j] = bs; 740 i+= bs; 741 } else { 742 sizes[j] = 1; 743 i++; 744 } 745 } 746 } 747 *bs_max = j; 748 PetscFunctionReturn(0); 749 } 750 751 752 /* Only add/insert a(i,j) with i<=j (blocks). 753 Any a(i,j) with i>j input by user is ingored. 754 */ 755 756 #undef __FUNCT__ 757 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 758 PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 759 { 760 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 761 PetscErrorCode ierr; 762 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 763 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 764 PetscInt *aj=a->j,nonew=a->nonew,bs=A->bs,brow,bcol; 765 PetscInt ridx,cidx,bs2=a->bs2; 766 MatScalar *ap,value,*aa=a->a,*bap; 767 768 PetscFunctionBegin; 769 for (k=0; k<m; k++) { /* loop over added rows */ 770 row = im[k]; /* row number */ 771 brow = row/bs; /* block row number */ 772 if (row < 0) continue; 773 #if defined(PETSC_USE_DEBUG) 774 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1); 775 #endif 776 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 777 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 778 rmax = imax[brow]; /* maximum space allocated for this row */ 779 nrow = ailen[brow]; /* actual length of this row */ 780 low = 0; 781 782 for (l=0; l<n; l++) { /* loop over added columns */ 783 if (in[l] < 0) continue; 784 #if defined(PETSC_USE_DEBUG) 785 if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->m-1); 786 #endif 787 col = in[l]; 788 bcol = col/bs; /* block col number */ 789 790 if (brow > bcol) { 791 if (a->ignore_ltriangular){ 792 continue; /* ignore lower triangular values */ 793 } else { 794 SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR)"); 795 } 796 } 797 798 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 799 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 800 /* element value a(k,l) */ 801 if (roworiented) { 802 value = v[l + k*n]; 803 } else { 804 value = v[k + l*m]; 805 } 806 807 /* move pointer bap to a(k,l) quickly and add/insert value */ 808 if (col <= lastcol) low = 0; high = nrow; 809 lastcol = col; 810 while (high-low > 7) { 811 t = (low+high)/2; 812 if (rp[t] > bcol) high = t; 813 else low = t; 814 } 815 for (i=low; i<high; i++) { 816 if (rp[i] > bcol) break; 817 if (rp[i] == bcol) { 818 bap = ap + bs2*i + bs*cidx + ridx; 819 if (is == ADD_VALUES) *bap += value; 820 else *bap = value; 821 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 822 if (brow == bcol && ridx < cidx){ 823 bap = ap + bs2*i + bs*ridx + cidx; 824 if (is == ADD_VALUES) *bap += value; 825 else *bap = value; 826 } 827 goto noinsert1; 828 } 829 } 830 831 if (nonew == 1) goto noinsert1; 832 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 833 MatSeqXAIJReallocateAIJ(a,bs2,nrow,brow,bcol,rmax,aa,ai,aj,a->mbs,rp,ap,imax,nonew); 834 835 N = nrow++ - 1; 836 /* shift up all the later entries in this row */ 837 for (ii=N; ii>=i; ii--) { 838 rp[ii+1] = rp[ii]; 839 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 840 } 841 if (N>=i) { 842 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 843 } 844 rp[i] = bcol; 845 ap[bs2*i + bs*cidx + ridx] = value; 846 noinsert1:; 847 low = i; 848 } 849 } /* end of loop over added columns */ 850 ailen[brow] = nrow; 851 } /* end of loop over added rows */ 852 PetscFunctionReturn(0); 853 } 854 855 EXTERN PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*); 856 EXTERN PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*); 857 EXTERN PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat,PetscInt,IS[],PetscInt); 858 EXTERN PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,PetscInt,MatReuse,Mat*); 859 EXTERN PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat*[]); 860 EXTERN PetscErrorCode MatScale_SeqSBAIJ(Mat,PetscScalar); 861 EXTERN PetscErrorCode MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *); 862 EXTERN PetscErrorCode MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*); 863 EXTERN PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat,Vec); 864 EXTERN PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec); 865 EXTERN PetscErrorCode MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *); 866 EXTERN PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat); 867 EXTERN PetscErrorCode MatGetRowMax_SeqSBAIJ(Mat,Vec); 868 869 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N(Mat,Vec,Vec); 870 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1(Mat,Vec,Vec); 871 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2(Mat,Vec,Vec); 872 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3(Mat,Vec,Vec); 873 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4(Mat,Vec,Vec); 874 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5(Mat,Vec,Vec); 875 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6(Mat,Vec,Vec); 876 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7(Mat,Vec,Vec); 877 878 EXTERN PetscErrorCode MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs); 879 880 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec); 881 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec); 882 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec); 883 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 884 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec); 885 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec); 886 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec); 887 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec); 888 889 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,MatFactorInfo*,Mat*); 890 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,MatFactorInfo*,Mat*); 891 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,MatFactorInfo*,Mat*); 892 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,MatFactorInfo*,Mat*); 893 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,MatFactorInfo*,Mat*); 894 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,MatFactorInfo*,Mat*); 895 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,MatFactorInfo*,Mat*); 896 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,MatFactorInfo*,Mat*); 897 EXTERN PetscErrorCode MatGetInertia_SeqSBAIJ(Mat,PetscInt*,PetscInt*,PetscInt*); 898 899 EXTERN PetscErrorCode MatMult_SeqSBAIJ_1(Mat,Vec,Vec); 900 EXTERN PetscErrorCode MatMult_SeqSBAIJ_2(Mat,Vec,Vec); 901 EXTERN PetscErrorCode MatMult_SeqSBAIJ_3(Mat,Vec,Vec); 902 EXTERN PetscErrorCode MatMult_SeqSBAIJ_4(Mat,Vec,Vec); 903 EXTERN PetscErrorCode MatMult_SeqSBAIJ_5(Mat,Vec,Vec); 904 EXTERN PetscErrorCode MatMult_SeqSBAIJ_6(Mat,Vec,Vec); 905 EXTERN PetscErrorCode MatMult_SeqSBAIJ_7(Mat,Vec,Vec); 906 EXTERN PetscErrorCode MatMult_SeqSBAIJ_N(Mat,Vec,Vec); 907 908 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec); 909 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec); 910 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec); 911 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec); 912 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec); 913 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec); 914 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec); 915 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec); 916 917 #ifdef HAVE_MatICCFactor 918 /* modified from MatILUFactor_SeqSBAIJ, needs further work! */ 919 #undef __FUNCT__ 920 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 921 PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info) 922 { 923 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 924 Mat outA; 925 PetscErrorCode ierr; 926 PetscTruth row_identity,col_identity; 927 928 PetscFunctionBegin; 929 outA = inA; 930 inA->factor = FACTOR_CHOLESKY; 931 932 if (!a->diag) { 933 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 934 } 935 /* 936 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 937 for ILU(0) factorization with natural ordering 938 */ 939 switch (a->bs) { 940 case 1: 941 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 942 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 943 inA->ops->solves = MatSolves_SeqSBAIJ_1; 944 ierr = PetscLoginfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n"));CHKERRQ(ierr); 945 case 2: 946 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 947 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 948 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 949 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"));CHKERRQ(ierr); 950 break; 951 case 3: 952 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 953 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 954 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 955 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n"));CHKERRQ(ierr); 956 break; 957 case 4: 958 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 959 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 960 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 961 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"));CHKERRQ(ierr); 962 break; 963 case 5: 964 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 965 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 966 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 967 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"));CHKERRQ(ierr); 968 break; 969 case 6: 970 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 971 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 972 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 973 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"));CHKERRQ(ierr); 974 break; 975 case 7: 976 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 977 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 978 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 979 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"));CHKERRQ(ierr); 980 break; 981 default: 982 a->row = row; 983 a->icol = col; 984 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 985 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 986 987 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 988 ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr); 989 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 990 991 if (!a->solve_work) { 992 ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 993 ierr = PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 994 } 995 } 996 997 ierr = MatCholeskyFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 998 PetscFunctionReturn(0); 999 } 1000 #endif 1001 1002 #undef __FUNCT__ 1003 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ" 1004 PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat A) 1005 { 1006 static PetscTruth called = PETSC_FALSE; 1007 MPI_Comm comm = A->comm; 1008 PetscErrorCode ierr; 1009 1010 PetscFunctionBegin; 1011 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1012 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1013 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1014 ierr = (*PetscHelpPrintf)(comm," -mat_ignore_ltriangular: Ignore lower triangular values set by user\n");CHKERRQ(ierr); 1015 PetscFunctionReturn(0); 1016 } 1017 1018 EXTERN_C_BEGIN 1019 #undef __FUNCT__ 1020 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1021 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,PetscInt *indices) 1022 { 1023 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1024 PetscInt i,nz,n; 1025 1026 PetscFunctionBegin; 1027 nz = baij->maxnz; 1028 n = mat->n; 1029 for (i=0; i<nz; i++) { 1030 baij->j[i] = indices[i]; 1031 } 1032 baij->nz = nz; 1033 for (i=0; i<n; i++) { 1034 baij->ilen[i] = baij->imax[i]; 1035 } 1036 PetscFunctionReturn(0); 1037 } 1038 EXTERN_C_END 1039 1040 #undef __FUNCT__ 1041 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1042 /*@ 1043 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1044 in the matrix. 1045 1046 Input Parameters: 1047 + mat - the SeqSBAIJ matrix 1048 - indices - the column indices 1049 1050 Level: advanced 1051 1052 Notes: 1053 This can be called if you have precomputed the nonzero structure of the 1054 matrix and want to provide it to the matrix object to improve the performance 1055 of the MatSetValues() operation. 1056 1057 You MUST have set the correct numbers of nonzeros per row in the call to 1058 MatCreateSeqSBAIJ(), and the columns indices MUST be sorted. 1059 1060 MUST be called before any calls to MatSetValues() 1061 1062 .seealso: MatCreateSeqSBAIJ 1063 @*/ 1064 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1065 { 1066 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1067 1068 PetscFunctionBegin; 1069 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1070 PetscValidPointer(indices,2); 1071 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1072 if (f) { 1073 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1074 } else { 1075 SETERRQ(PETSC_ERR_SUP,"Wrong type of matrix to set column indices"); 1076 } 1077 PetscFunctionReturn(0); 1078 } 1079 1080 #undef __FUNCT__ 1081 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1082 PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A) 1083 { 1084 PetscErrorCode ierr; 1085 1086 PetscFunctionBegin; 1087 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1088 PetscFunctionReturn(0); 1089 } 1090 1091 #undef __FUNCT__ 1092 #define __FUNCT__ "MatGetArray_SeqSBAIJ" 1093 PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1094 { 1095 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1096 PetscFunctionBegin; 1097 *array = a->a; 1098 PetscFunctionReturn(0); 1099 } 1100 1101 #undef __FUNCT__ 1102 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ" 1103 PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1104 { 1105 PetscFunctionBegin; 1106 PetscFunctionReturn(0); 1107 } 1108 1109 #include "petscblaslapack.h" 1110 #undef __FUNCT__ 1111 #define __FUNCT__ "MatAXPY_SeqSBAIJ" 1112 PetscErrorCode MatAXPY_SeqSBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1113 { 1114 Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data; 1115 PetscErrorCode ierr; 1116 PetscInt i,bs=Y->bs,bs2,j; 1117 PetscBLASInt bnz = (PetscBLASInt)x->nz,one = 1; 1118 1119 PetscFunctionBegin; 1120 if (str == SAME_NONZERO_PATTERN) { 1121 PetscScalar alpha = a; 1122 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1123 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1124 if (y->xtoy && y->XtoY != X) { 1125 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1126 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1127 } 1128 if (!y->xtoy) { /* get xtoy */ 1129 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1130 y->XtoY = X; 1131 } 1132 bs2 = bs*bs; 1133 for (i=0; i<x->nz; i++) { 1134 j = 0; 1135 while (j < bs2){ 1136 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 1137 j++; 1138 } 1139 } 1140 ierr = PetscLogInfo((0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)));CHKERRQ(ierr); 1141 } else { 1142 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1143 } 1144 PetscFunctionReturn(0); 1145 } 1146 1147 #undef __FUNCT__ 1148 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ" 1149 PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg) 1150 { 1151 PetscFunctionBegin; 1152 *flg = PETSC_TRUE; 1153 PetscFunctionReturn(0); 1154 } 1155 1156 #undef __FUNCT__ 1157 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ" 1158 PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg) 1159 { 1160 PetscFunctionBegin; 1161 *flg = PETSC_TRUE; 1162 PetscFunctionReturn(0); 1163 } 1164 1165 #undef __FUNCT__ 1166 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ" 1167 PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscTruth *flg) 1168 { 1169 PetscFunctionBegin; 1170 *flg = PETSC_FALSE; 1171 PetscFunctionReturn(0); 1172 } 1173 1174 #undef __FUNCT__ 1175 #define __FUNCT__ "MatRealPart_SeqSBAIJ" 1176 PetscErrorCode MatRealPart_SeqSBAIJ(Mat A) 1177 { 1178 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1179 PetscInt i,nz = a->bs2*a->i[a->mbs]; 1180 PetscScalar *aa = a->a; 1181 1182 PetscFunctionBegin; 1183 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 1184 PetscFunctionReturn(0); 1185 } 1186 1187 #undef __FUNCT__ 1188 #define __FUNCT__ "MatImaginaryPart_SeqSBAIJ" 1189 PetscErrorCode MatImaginaryPart_SeqSBAIJ(Mat A) 1190 { 1191 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1192 PetscInt i,nz = a->bs2*a->i[a->mbs]; 1193 PetscScalar *aa = a->a; 1194 1195 PetscFunctionBegin; 1196 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 1197 PetscFunctionReturn(0); 1198 } 1199 1200 /* -------------------------------------------------------------------*/ 1201 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1202 MatGetRow_SeqSBAIJ, 1203 MatRestoreRow_SeqSBAIJ, 1204 MatMult_SeqSBAIJ_N, 1205 /* 4*/ MatMultAdd_SeqSBAIJ_N, 1206 MatMult_SeqSBAIJ_N, /* transpose versions are same as non-transpose versions */ 1207 MatMultAdd_SeqSBAIJ_N, 1208 MatSolve_SeqSBAIJ_N, 1209 0, 1210 0, 1211 /*10*/ 0, 1212 0, 1213 MatCholeskyFactor_SeqSBAIJ, 1214 MatRelax_SeqSBAIJ, 1215 MatTranspose_SeqSBAIJ, 1216 /*15*/ MatGetInfo_SeqSBAIJ, 1217 MatEqual_SeqSBAIJ, 1218 MatGetDiagonal_SeqSBAIJ, 1219 MatDiagonalScale_SeqSBAIJ, 1220 MatNorm_SeqSBAIJ, 1221 /*20*/ 0, 1222 MatAssemblyEnd_SeqSBAIJ, 1223 0, 1224 MatSetOption_SeqSBAIJ, 1225 MatZeroEntries_SeqSBAIJ, 1226 /*25*/ 0, 1227 0, 1228 0, 1229 MatCholeskyFactorSymbolic_SeqSBAIJ, 1230 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1231 /*30*/ MatSetUpPreallocation_SeqSBAIJ, 1232 0, 1233 MatICCFactorSymbolic_SeqSBAIJ, 1234 MatGetArray_SeqSBAIJ, 1235 MatRestoreArray_SeqSBAIJ, 1236 /*35*/ MatDuplicate_SeqSBAIJ, 1237 0, 1238 0, 1239 0, 1240 0, 1241 /*40*/ MatAXPY_SeqSBAIJ, 1242 MatGetSubMatrices_SeqSBAIJ, 1243 MatIncreaseOverlap_SeqSBAIJ, 1244 MatGetValues_SeqSBAIJ, 1245 0, 1246 /*45*/ MatPrintHelp_SeqSBAIJ, 1247 MatScale_SeqSBAIJ, 1248 0, 1249 0, 1250 0, 1251 /*50*/ 0, 1252 MatGetRowIJ_SeqSBAIJ, 1253 MatRestoreRowIJ_SeqSBAIJ, 1254 0, 1255 0, 1256 /*55*/ 0, 1257 0, 1258 0, 1259 0, 1260 MatSetValuesBlocked_SeqSBAIJ, 1261 /*60*/ MatGetSubMatrix_SeqSBAIJ, 1262 0, 1263 0, 1264 MatGetPetscMaps_Petsc, 1265 0, 1266 /*65*/ 0, 1267 0, 1268 0, 1269 0, 1270 0, 1271 /*70*/ MatGetRowMax_SeqSBAIJ, 1272 0, 1273 0, 1274 0, 1275 0, 1276 /*75*/ 0, 1277 0, 1278 0, 1279 0, 1280 0, 1281 /*80*/ 0, 1282 0, 1283 0, 1284 #if !defined(PETSC_USE_COMPLEX) 1285 MatGetInertia_SeqSBAIJ, 1286 #else 1287 0, 1288 #endif 1289 MatLoad_SeqSBAIJ, 1290 /*85*/ MatIsSymmetric_SeqSBAIJ, 1291 MatIsHermitian_SeqSBAIJ, 1292 MatIsStructurallySymmetric_SeqSBAIJ, 1293 0, 1294 0, 1295 /*90*/ 0, 1296 0, 1297 0, 1298 0, 1299 0, 1300 /*95*/ 0, 1301 0, 1302 0, 1303 0, 1304 0, 1305 /*100*/0, 1306 0, 1307 0, 1308 0, 1309 0, 1310 /*105*/0, 1311 MatRealPart_SeqSBAIJ, 1312 MatImaginaryPart_SeqSBAIJ 1313 }; 1314 1315 EXTERN_C_BEGIN 1316 #undef __FUNCT__ 1317 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1318 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqSBAIJ(Mat mat) 1319 { 1320 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1321 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 1322 PetscErrorCode ierr; 1323 1324 PetscFunctionBegin; 1325 if (aij->nonew != 1) { 1326 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1327 } 1328 1329 /* allocate space for values if not already there */ 1330 if (!aij->saved_values) { 1331 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1332 } 1333 1334 /* copy values over */ 1335 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1336 PetscFunctionReturn(0); 1337 } 1338 EXTERN_C_END 1339 1340 EXTERN_C_BEGIN 1341 #undef __FUNCT__ 1342 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1343 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqSBAIJ(Mat mat) 1344 { 1345 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1346 PetscErrorCode ierr; 1347 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 1348 1349 PetscFunctionBegin; 1350 if (aij->nonew != 1) { 1351 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1352 } 1353 if (!aij->saved_values) { 1354 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 1355 } 1356 1357 /* copy values over */ 1358 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1359 PetscFunctionReturn(0); 1360 } 1361 EXTERN_C_END 1362 1363 EXTERN_C_BEGIN 1364 #undef __FUNCT__ 1365 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ" 1366 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 1367 { 1368 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1369 PetscErrorCode ierr; 1370 PetscInt i,mbs,bs2; 1371 PetscTruth skipallocation = PETSC_FALSE,flg; 1372 1373 PetscFunctionBegin; 1374 B->preallocated = PETSC_TRUE; 1375 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1376 mbs = B->m/bs; 1377 bs2 = bs*bs; 1378 1379 if (mbs*bs != B->m) { 1380 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1381 } 1382 1383 if (nz == MAT_SKIP_ALLOCATION) { 1384 skipallocation = PETSC_TRUE; 1385 nz = 0; 1386 } 1387 1388 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1389 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 1390 if (nnz) { 1391 for (i=0; i<mbs; i++) { 1392 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 1393 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],mbs); 1394 } 1395 } 1396 1397 ierr = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1398 if (!flg) { 1399 switch (bs) { 1400 case 1: 1401 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1402 B->ops->solve = MatSolve_SeqSBAIJ_1; 1403 B->ops->solves = MatSolves_SeqSBAIJ_1; 1404 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 1405 B->ops->mult = MatMult_SeqSBAIJ_1; 1406 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1407 B->ops->multtranspose = MatMult_SeqSBAIJ_1; 1408 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_1; 1409 break; 1410 case 2: 1411 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1412 B->ops->solve = MatSolve_SeqSBAIJ_2; 1413 B->ops->solvetranspose = MatSolve_SeqSBAIJ_2; 1414 B->ops->mult = MatMult_SeqSBAIJ_2; 1415 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1416 B->ops->multtranspose = MatMult_SeqSBAIJ_2; 1417 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_2; 1418 break; 1419 case 3: 1420 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1421 B->ops->solve = MatSolve_SeqSBAIJ_3; 1422 B->ops->solvetranspose = MatSolve_SeqSBAIJ_3; 1423 B->ops->mult = MatMult_SeqSBAIJ_3; 1424 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1425 B->ops->multtranspose = MatMult_SeqSBAIJ_3; 1426 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_3; 1427 break; 1428 case 4: 1429 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1430 B->ops->solve = MatSolve_SeqSBAIJ_4; 1431 B->ops->solvetranspose = MatSolve_SeqSBAIJ_4; 1432 B->ops->mult = MatMult_SeqSBAIJ_4; 1433 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1434 B->ops->multtranspose = MatMult_SeqSBAIJ_4; 1435 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_4; 1436 break; 1437 case 5: 1438 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1439 B->ops->solve = MatSolve_SeqSBAIJ_5; 1440 B->ops->solvetranspose = MatSolve_SeqSBAIJ_5; 1441 B->ops->mult = MatMult_SeqSBAIJ_5; 1442 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1443 B->ops->multtranspose = MatMult_SeqSBAIJ_5; 1444 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_5; 1445 break; 1446 case 6: 1447 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1448 B->ops->solve = MatSolve_SeqSBAIJ_6; 1449 B->ops->solvetranspose = MatSolve_SeqSBAIJ_6; 1450 B->ops->mult = MatMult_SeqSBAIJ_6; 1451 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1452 B->ops->multtranspose = MatMult_SeqSBAIJ_6; 1453 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_6; 1454 break; 1455 case 7: 1456 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1457 B->ops->solve = MatSolve_SeqSBAIJ_7; 1458 B->ops->solvetranspose = MatSolve_SeqSBAIJ_7; 1459 B->ops->mult = MatMult_SeqSBAIJ_7; 1460 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1461 B->ops->multtranspose = MatMult_SeqSBAIJ_7; 1462 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_7; 1463 break; 1464 } 1465 } 1466 1467 b->mbs = mbs; 1468 b->nbs = mbs; 1469 if (!skipallocation) { 1470 /* b->ilen will count nonzeros in each block row so far. */ 1471 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 1472 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1473 if (!nnz) { 1474 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1475 else if (nz <= 0) nz = 1; 1476 for (i=0; i<mbs; i++) { 1477 b->imax[i] = nz; 1478 } 1479 nz = nz*mbs; /* total nz */ 1480 } else { 1481 nz = 0; 1482 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1483 } 1484 /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1485 1486 /* allocate the matrix space */ 1487 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->m+1,PetscInt,&b->i);CHKERRQ(ierr); 1488 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1489 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1490 b->singlemalloc = PETSC_TRUE; 1491 1492 /* pointer to beginning of each row */ 1493 b->i[0] = 0; 1494 for (i=1; i<mbs+1; i++) { 1495 b->i[i] = b->i[i-1] + b->imax[i-1]; 1496 } 1497 } 1498 1499 B->bs = bs; 1500 b->bs2 = bs2; 1501 b->nz = 0; 1502 b->maxnz = nz*bs2; 1503 1504 b->inew = 0; 1505 b->jnew = 0; 1506 b->anew = 0; 1507 b->a2anew = 0; 1508 b->permute = PETSC_FALSE; 1509 PetscFunctionReturn(0); 1510 } 1511 EXTERN_C_END 1512 1513 EXTERN_C_BEGIN 1514 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 1515 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqBAIJ(Mat, MatType,MatReuse,Mat*); 1516 EXTERN_C_END 1517 1518 /*MC 1519 MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices, 1520 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1521 1522 Options Database Keys: 1523 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions() 1524 1525 Level: beginner 1526 1527 .seealso: MatCreateSeqSBAIJ 1528 M*/ 1529 1530 EXTERN_C_BEGIN 1531 #undef __FUNCT__ 1532 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1533 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqSBAIJ(Mat B) 1534 { 1535 Mat_SeqSBAIJ *b; 1536 PetscErrorCode ierr; 1537 PetscMPIInt size; 1538 PetscTruth flg; 1539 1540 PetscFunctionBegin; 1541 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1542 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1543 B->m = B->M = PetscMax(B->m,B->M); 1544 B->n = B->N = PetscMax(B->n,B->N); 1545 1546 ierr = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1547 B->data = (void*)b; 1548 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1549 B->ops->destroy = MatDestroy_SeqSBAIJ; 1550 B->ops->view = MatView_SeqSBAIJ; 1551 B->factor = 0; 1552 B->mapping = 0; 1553 b->row = 0; 1554 b->icol = 0; 1555 b->reallocs = 0; 1556 b->saved_values = 0; 1557 1558 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1559 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1560 1561 b->sorted = PETSC_FALSE; 1562 b->roworiented = PETSC_TRUE; 1563 b->nonew = 0; 1564 b->diag = 0; 1565 b->solve_work = 0; 1566 b->mult_work = 0; 1567 B->spptr = 0; 1568 b->keepzeroedrows = PETSC_FALSE; 1569 b->xtoy = 0; 1570 b->XtoY = 0; 1571 1572 b->inew = 0; 1573 b->jnew = 0; 1574 b->anew = 0; 1575 b->a2anew = 0; 1576 b->permute = PETSC_FALSE; 1577 1578 b->ignore_ltriangular = PETSC_FALSE; 1579 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_ignore_lower_triangular",&flg);CHKERRQ(ierr); 1580 if (flg) b->ignore_ltriangular = PETSC_TRUE; 1581 1582 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1583 "MatStoreValues_SeqSBAIJ", 1584 MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1585 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1586 "MatRetrieveValues_SeqSBAIJ", 1587 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1588 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1589 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1590 MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1591 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C", 1592 "MatConvert_SeqSBAIJ_SeqAIJ", 1593 MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr); 1594 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C", 1595 "MatConvert_SeqSBAIJ_SeqBAIJ", 1596 MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr); 1597 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C", 1598 "MatSeqSBAIJSetPreallocation_SeqSBAIJ", 1599 MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr); 1600 1601 B->symmetric = PETSC_TRUE; 1602 B->structurally_symmetric = PETSC_TRUE; 1603 B->symmetric_set = PETSC_TRUE; 1604 B->structurally_symmetric_set = PETSC_TRUE; 1605 PetscFunctionReturn(0); 1606 } 1607 EXTERN_C_END 1608 1609 #undef __FUNCT__ 1610 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1611 /*@C 1612 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1613 compressed row) format. For good matrix assembly performance the 1614 user should preallocate the matrix storage by setting the parameter nz 1615 (or the array nnz). By setting these parameters accurately, performance 1616 during matrix assembly can be increased by more than a factor of 50. 1617 1618 Collective on Mat 1619 1620 Input Parameters: 1621 + A - the symmetric matrix 1622 . bs - size of block 1623 . nz - number of block nonzeros per block row (same for all rows) 1624 - nnz - array containing the number of block nonzeros in the upper triangular plus 1625 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1626 1627 Options Database Keys: 1628 . -mat_no_unroll - uses code that does not unroll the loops in the 1629 block calculations (much slower) 1630 . -mat_block_size - size of the blocks to use 1631 1632 Level: intermediate 1633 1634 Notes: 1635 Specify the preallocated storage with either nz or nnz (not both). 1636 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1637 allocation. For additional details, see the users manual chapter on 1638 matrices. 1639 1640 If the nnz parameter is given then the nz parameter is ignored 1641 1642 1643 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1644 @*/ 1645 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 1646 { 1647 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 1648 1649 PetscFunctionBegin; 1650 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1651 if (f) { 1652 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 1653 } 1654 PetscFunctionReturn(0); 1655 } 1656 1657 #undef __FUNCT__ 1658 #define __FUNCT__ "MatCreateSeqSBAIJ" 1659 /*@C 1660 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1661 compressed row) format. For good matrix assembly performance the 1662 user should preallocate the matrix storage by setting the parameter nz 1663 (or the array nnz). By setting these parameters accurately, performance 1664 during matrix assembly can be increased by more than a factor of 50. 1665 1666 Collective on MPI_Comm 1667 1668 Input Parameters: 1669 + comm - MPI communicator, set to PETSC_COMM_SELF 1670 . bs - size of block 1671 . m - number of rows, or number of columns 1672 . nz - number of block nonzeros per block row (same for all rows) 1673 - nnz - array containing the number of block nonzeros in the upper triangular plus 1674 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1675 1676 Output Parameter: 1677 . A - the symmetric matrix 1678 1679 Options Database Keys: 1680 . -mat_no_unroll - uses code that does not unroll the loops in the 1681 block calculations (much slower) 1682 . -mat_block_size - size of the blocks to use 1683 1684 Level: intermediate 1685 1686 Notes: 1687 1688 Specify the preallocated storage with either nz or nnz (not both). 1689 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1690 allocation. For additional details, see the users manual chapter on 1691 matrices. 1692 1693 If the nnz parameter is given then the nz parameter is ignored 1694 1695 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1696 @*/ 1697 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 1698 { 1699 PetscErrorCode ierr; 1700 1701 PetscFunctionBegin; 1702 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1703 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 1704 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1705 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 1706 PetscFunctionReturn(0); 1707 } 1708 1709 #undef __FUNCT__ 1710 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1711 PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1712 { 1713 Mat C; 1714 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1715 PetscErrorCode ierr; 1716 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 1717 1718 PetscFunctionBegin; 1719 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1720 1721 *B = 0; 1722 ierr = MatCreate(A->comm,&C);CHKERRQ(ierr); 1723 ierr = MatSetSizes(C,A->m,A->n,A->m,A->n);CHKERRQ(ierr); 1724 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 1725 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1726 c = (Mat_SeqSBAIJ*)C->data; 1727 1728 C->preallocated = PETSC_TRUE; 1729 C->factor = A->factor; 1730 c->row = 0; 1731 c->icol = 0; 1732 c->saved_values = 0; 1733 c->keepzeroedrows = a->keepzeroedrows; 1734 C->assembled = PETSC_TRUE; 1735 1736 C->M = A->M; 1737 C->N = A->N; 1738 C->bs = A->bs; 1739 c->bs2 = a->bs2; 1740 c->mbs = a->mbs; 1741 c->nbs = a->nbs; 1742 1743 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->imax);CHKERRQ(ierr); 1744 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->ilen);CHKERRQ(ierr); 1745 for (i=0; i<mbs; i++) { 1746 c->imax[i] = a->imax[i]; 1747 c->ilen[i] = a->ilen[i]; 1748 } 1749 ierr = PetscLogObjectMemory(C,2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr); 1750 1751 /* allocate the matrix space */ 1752 ierr = PetscMalloc3(bs2*nz,MatScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 1753 c->singlemalloc = PETSC_TRUE; 1754 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1755 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt)));CHKERRQ(ierr); 1756 if (mbs > 0) { 1757 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1758 if (cpvalues == MAT_COPY_VALUES) { 1759 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1760 } else { 1761 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1762 } 1763 } 1764 1765 c->sorted = a->sorted; 1766 c->roworiented = a->roworiented; 1767 c->nonew = a->nonew; 1768 1769 if (a->diag) { 1770 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 1771 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1772 for (i=0; i<mbs; i++) { 1773 c->diag[i] = a->diag[i]; 1774 } 1775 } else c->diag = 0; 1776 c->nz = a->nz; 1777 c->maxnz = a->maxnz; 1778 c->solve_work = 0; 1779 c->mult_work = 0; 1780 *B = C; 1781 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1782 PetscFunctionReturn(0); 1783 } 1784 1785 #undef __FUNCT__ 1786 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1787 PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer, MatType type,Mat *A) 1788 { 1789 Mat_SeqSBAIJ *a; 1790 Mat B; 1791 PetscErrorCode ierr; 1792 int fd; 1793 PetscMPIInt size; 1794 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 1795 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 1796 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 1797 PetscInt *masked,nmask,tmp,bs2,ishift; 1798 PetscScalar *aa; 1799 MPI_Comm comm = ((PetscObject)viewer)->comm; 1800 1801 PetscFunctionBegin; 1802 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1803 bs2 = bs*bs; 1804 1805 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1806 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1807 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1808 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1809 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1810 M = header[1]; N = header[2]; nz = header[3]; 1811 1812 if (header[3] < 0) { 1813 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1814 } 1815 1816 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1817 1818 /* 1819 This code adds extra rows to make sure the number of rows is 1820 divisible by the blocksize 1821 */ 1822 mbs = M/bs; 1823 extra_rows = bs - M + bs*(mbs); 1824 if (extra_rows == bs) extra_rows = 0; 1825 else mbs++; 1826 if (extra_rows) { 1827 ierr = PetscLogInfo((0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr); 1828 } 1829 1830 /* read in row lengths */ 1831 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1832 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1833 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1834 1835 /* read in column indices */ 1836 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1837 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1838 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1839 1840 /* loop over row lengths determining block row lengths */ 1841 ierr = PetscMalloc(mbs*sizeof(PetscInt),&s_browlengths);CHKERRQ(ierr); 1842 ierr = PetscMemzero(s_browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1843 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 1844 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1845 masked = mask + mbs; 1846 rowcount = 0; nzcount = 0; 1847 for (i=0; i<mbs; i++) { 1848 nmask = 0; 1849 for (j=0; j<bs; j++) { 1850 kmax = rowlengths[rowcount]; 1851 for (k=0; k<kmax; k++) { 1852 tmp = jj[nzcount++]/bs; /* block col. index */ 1853 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1854 } 1855 rowcount++; 1856 } 1857 s_browlengths[i] += nmask; 1858 1859 /* zero out the mask elements we set */ 1860 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1861 } 1862 1863 /* create our matrix */ 1864 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 1865 ierr = MatSetSizes(B,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 1866 ierr = MatSetType(B,type);CHKERRQ(ierr); 1867 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(B,bs,0,s_browlengths);CHKERRQ(ierr); 1868 a = (Mat_SeqSBAIJ*)B->data; 1869 1870 /* set matrix "i" values */ 1871 a->i[0] = 0; 1872 for (i=1; i<= mbs; i++) { 1873 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1874 a->ilen[i-1] = s_browlengths[i-1]; 1875 } 1876 a->nz = a->i[mbs]; 1877 1878 /* read in nonzero values */ 1879 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1880 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1881 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1882 1883 /* set "a" and "j" values into matrix */ 1884 nzcount = 0; jcount = 0; 1885 for (i=0; i<mbs; i++) { 1886 nzcountb = nzcount; 1887 nmask = 0; 1888 for (j=0; j<bs; j++) { 1889 kmax = rowlengths[i*bs+j]; 1890 for (k=0; k<kmax; k++) { 1891 tmp = jj[nzcount++]/bs; /* block col. index */ 1892 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1893 } 1894 } 1895 /* sort the masked values */ 1896 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1897 1898 /* set "j" values into matrix */ 1899 maskcount = 1; 1900 for (j=0; j<nmask; j++) { 1901 a->j[jcount++] = masked[j]; 1902 mask[masked[j]] = maskcount++; 1903 } 1904 1905 /* set "a" values into matrix */ 1906 ishift = bs2*a->i[i]; 1907 for (j=0; j<bs; j++) { 1908 kmax = rowlengths[i*bs+j]; 1909 for (k=0; k<kmax; k++) { 1910 tmp = jj[nzcountb]/bs ; /* block col. index */ 1911 if (tmp >= i){ 1912 block = mask[tmp] - 1; 1913 point = jj[nzcountb] - bs*tmp; 1914 idx = ishift + bs2*block + j + bs*point; 1915 a->a[idx] = aa[nzcountb]; 1916 } 1917 nzcountb++; 1918 } 1919 } 1920 /* zero out the mask elements we set */ 1921 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1922 } 1923 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1924 1925 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1926 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1927 ierr = PetscFree(aa);CHKERRQ(ierr); 1928 ierr = PetscFree(jj);CHKERRQ(ierr); 1929 ierr = PetscFree(mask);CHKERRQ(ierr); 1930 1931 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1932 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1933 ierr = MatView_Private(B);CHKERRQ(ierr); 1934 *A = B; 1935 PetscFunctionReturn(0); 1936 } 1937 1938 #undef __FUNCT__ 1939 #define __FUNCT__ "MatRelax_SeqSBAIJ" 1940 PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1941 { 1942 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1943 MatScalar *aa=a->a,*v,*v1; 1944 PetscScalar *x,*b,*t,sum,d; 1945 PetscErrorCode ierr; 1946 PetscInt m=a->mbs,bs=A->bs,*ai=a->i,*aj=a->j; 1947 PetscInt nz,nz1,*vj,*vj1,i; 1948 1949 PetscFunctionBegin; 1950 its = its*lits; 1951 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1952 1953 if (bs > 1) 1954 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 1955 1956 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1957 if (xx != bb) { 1958 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1959 } else { 1960 b = x; 1961 } 1962 1963 ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr); 1964 1965 if (flag & SOR_ZERO_INITIAL_GUESS) { 1966 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1967 for (i=0; i<m; i++) 1968 t[i] = b[i]; 1969 1970 for (i=0; i<m; i++){ 1971 d = *(aa + ai[i]); /* diag[i] */ 1972 v = aa + ai[i] + 1; 1973 vj = aj + ai[i] + 1; 1974 nz = ai[i+1] - ai[i] - 1; 1975 x[i] = omega*t[i]/d; 1976 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 1977 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 1978 } 1979 } 1980 1981 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 1982 for (i=0; i<m; i++) 1983 t[i] = b[i]; 1984 1985 for (i=0; i<m-1; i++){ /* update rhs */ 1986 v = aa + ai[i] + 1; 1987 vj = aj + ai[i] + 1; 1988 nz = ai[i+1] - ai[i] - 1; 1989 while (nz--) t[*vj++] -= x[i]*(*v++); 1990 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 1991 } 1992 for (i=m-1; i>=0; i--){ 1993 d = *(aa + ai[i]); 1994 v = aa + ai[i] + 1; 1995 vj = aj + ai[i] + 1; 1996 nz = ai[i+1] - ai[i] - 1; 1997 sum = t[i]; 1998 while (nz--) sum -= x[*vj++]*(*v++); 1999 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2000 x[i] = (1-omega)*x[i] + omega*sum/d; 2001 } 2002 } 2003 its--; 2004 } 2005 2006 while (its--) { 2007 /* 2008 forward sweep: 2009 for i=0,...,m-1: 2010 sum[i] = (b[i] - U(i,:)x )/d[i]; 2011 x[i] = (1-omega)x[i] + omega*sum[i]; 2012 b = b - x[i]*U^T(i,:); 2013 2014 */ 2015 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2016 for (i=0; i<m; i++) 2017 t[i] = b[i]; 2018 2019 for (i=0; i<m; i++){ 2020 d = *(aa + ai[i]); /* diag[i] */ 2021 v = aa + ai[i] + 1; v1=v; 2022 vj = aj + ai[i] + 1; vj1=vj; 2023 nz = ai[i+1] - ai[i] - 1; nz1=nz; 2024 sum = t[i]; 2025 while (nz1--) sum -= (*v1++)*x[*vj1++]; 2026 x[i] = (1-omega)*x[i] + omega*sum/d; 2027 while (nz--) t[*vj++] -= x[i]*(*v++); 2028 ierr = PetscLogFlops(4*nz-2);CHKERRQ(ierr); 2029 } 2030 } 2031 2032 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2033 /* 2034 backward sweep: 2035 b = b - x[i]*U^T(i,:), i=0,...,n-2 2036 for i=m-1,...,0: 2037 sum[i] = (b[i] - U(i,:)x )/d[i]; 2038 x[i] = (1-omega)x[i] + omega*sum[i]; 2039 */ 2040 for (i=0; i<m; i++) 2041 t[i] = b[i]; 2042 2043 for (i=0; i<m-1; i++){ /* update rhs */ 2044 v = aa + ai[i] + 1; 2045 vj = aj + ai[i] + 1; 2046 nz = ai[i+1] - ai[i] - 1; 2047 while (nz--) t[*vj++] -= x[i]*(*v++); 2048 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2049 } 2050 for (i=m-1; i>=0; i--){ 2051 d = *(aa + ai[i]); 2052 v = aa + ai[i] + 1; 2053 vj = aj + ai[i] + 1; 2054 nz = ai[i+1] - ai[i] - 1; 2055 sum = t[i]; 2056 while (nz--) sum -= x[*vj++]*(*v++); 2057 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2058 x[i] = (1-omega)*x[i] + omega*sum/d; 2059 } 2060 } 2061 } 2062 2063 ierr = PetscFree(t);CHKERRQ(ierr); 2064 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2065 if (bb != xx) { 2066 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2067 } 2068 PetscFunctionReturn(0); 2069 } 2070 2071 2072 2073 2074 2075 2076