1 2 /* 3 Defines the basic matrix operations for the SBAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 7 #include "src/inline/spops.h" 8 #include "src/mat/impls/sbaij/seq/sbaij.h" 9 10 #define CHUNKSIZE 10 11 12 /* 13 Checks for missing diagonals 14 */ 15 #undef __FUNCT__ 16 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 17 PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A) 18 { 19 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 20 PetscErrorCode ierr; 21 int *diag,*jj = a->j,i; 22 23 PetscFunctionBegin; 24 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 25 diag = a->diag; 26 for (i=0; i<a->mbs; i++) { 27 if (jj[diag[i]] != i) SETERRQ1(1,"Matrix is missing diagonal number %d",i); 28 } 29 PetscFunctionReturn(0); 30 } 31 32 #undef __FUNCT__ 33 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 34 PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A) 35 { 36 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 37 PetscErrorCode ierr; 38 int i,mbs = a->mbs; 39 40 PetscFunctionBegin; 41 if (a->diag) PetscFunctionReturn(0); 42 43 ierr = PetscMalloc((mbs+1)*sizeof(int),&a->diag);CHKERRQ(ierr); 44 PetscLogObjectMemory(A,(mbs+1)*sizeof(int)); 45 for (i=0; i<mbs; i++) a->diag[i] = a->i[i]; 46 PetscFunctionReturn(0); 47 } 48 49 #undef __FUNCT__ 50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 51 static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done) 52 { 53 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 54 int n = a->mbs,i; 55 56 PetscFunctionBegin; 57 *nn = n; 58 if (!ia) PetscFunctionReturn(0); 59 60 if (oshift == 1) { 61 /* temporarily add 1 to i and j indices */ 62 int nz = a->i[n]; 63 for (i=0; i<nz; i++) a->j[i]++; 64 for (i=0; i<n+1; i++) a->i[i]++; 65 *ia = a->i; *ja = a->j; 66 } else { 67 *ia = a->i; *ja = a->j; 68 } 69 PetscFunctionReturn(0); 70 } 71 72 #undef __FUNCT__ 73 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 74 static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done) 75 { 76 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 77 int i,n = a->mbs; 78 79 PetscFunctionBegin; 80 if (!ia) PetscFunctionReturn(0); 81 82 if (oshift == 1) { 83 int nz = a->i[n]-1; 84 for (i=0; i<nz; i++) a->j[i]--; 85 for (i=0; i<n+1; i++) a->i[i]--; 86 } 87 PetscFunctionReturn(0); 88 } 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatGetBlockSize_SeqSBAIJ" 92 PetscErrorCode MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs) 93 { 94 Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data; 95 96 PetscFunctionBegin; 97 *bs = sbaij->bs; 98 PetscFunctionReturn(0); 99 } 100 101 #undef __FUNCT__ 102 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 103 PetscErrorCode MatDestroy_SeqSBAIJ(Mat A) 104 { 105 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 106 PetscErrorCode ierr; 107 108 PetscFunctionBegin; 109 #if defined(PETSC_USE_LOG) 110 PetscLogObjectState((PetscObject)A,"Rows=%d, NZ=%d",A->m,a->nz); 111 #endif 112 ierr = PetscFree(a->a);CHKERRQ(ierr); 113 if (!a->singlemalloc) { 114 ierr = PetscFree(a->i);CHKERRQ(ierr); 115 ierr = PetscFree(a->j);CHKERRQ(ierr); 116 } 117 if (a->row) { 118 ierr = ISDestroy(a->row);CHKERRQ(ierr); 119 } 120 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 121 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 122 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 123 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 124 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 125 if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);} 126 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 127 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 128 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 129 130 if (a->inew){ 131 ierr = PetscFree(a->inew);CHKERRQ(ierr); 132 a->inew = 0; 133 } 134 ierr = PetscFree(a);CHKERRQ(ierr); 135 136 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 137 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 138 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 139 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 140 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqbaij_C","",PETSC_NULL);CHKERRQ(ierr); 141 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 142 PetscFunctionReturn(0); 143 } 144 145 #undef __FUNCT__ 146 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 147 PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op) 148 { 149 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 150 151 PetscFunctionBegin; 152 switch (op) { 153 case MAT_ROW_ORIENTED: 154 a->roworiented = PETSC_TRUE; 155 break; 156 case MAT_COLUMN_ORIENTED: 157 a->roworiented = PETSC_FALSE; 158 break; 159 case MAT_COLUMNS_SORTED: 160 a->sorted = PETSC_TRUE; 161 break; 162 case MAT_COLUMNS_UNSORTED: 163 a->sorted = PETSC_FALSE; 164 break; 165 case MAT_KEEP_ZEROED_ROWS: 166 a->keepzeroedrows = PETSC_TRUE; 167 break; 168 case MAT_NO_NEW_NONZERO_LOCATIONS: 169 a->nonew = 1; 170 break; 171 case MAT_NEW_NONZERO_LOCATION_ERR: 172 a->nonew = -1; 173 break; 174 case MAT_NEW_NONZERO_ALLOCATION_ERR: 175 a->nonew = -2; 176 break; 177 case MAT_YES_NEW_NONZERO_LOCATIONS: 178 a->nonew = 0; 179 break; 180 case MAT_ROWS_SORTED: 181 case MAT_ROWS_UNSORTED: 182 case MAT_YES_NEW_DIAGONALS: 183 case MAT_IGNORE_OFF_PROC_ENTRIES: 184 case MAT_USE_HASH_TABLE: 185 PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n"); 186 break; 187 case MAT_NO_NEW_DIAGONALS: 188 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 189 case MAT_NOT_SYMMETRIC: 190 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 191 case MAT_HERMITIAN: 192 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 193 case MAT_SYMMETRIC: 194 case MAT_STRUCTURALLY_SYMMETRIC: 195 case MAT_NOT_HERMITIAN: 196 case MAT_SYMMETRY_ETERNAL: 197 case MAT_NOT_SYMMETRY_ETERNAL: 198 break; 199 default: 200 SETERRQ(PETSC_ERR_SUP,"unknown option"); 201 } 202 PetscFunctionReturn(0); 203 } 204 205 #undef __FUNCT__ 206 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 207 PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,PetscScalar **v) 208 { 209 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 210 PetscErrorCode ierr; 211 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2; 212 MatScalar *aa,*aa_i; 213 PetscScalar *v_i; 214 215 PetscFunctionBegin; 216 bs = a->bs; 217 ai = a->i; 218 aj = a->j; 219 aa = a->a; 220 bs2 = a->bs2; 221 222 if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %d out of range", row); 223 224 bn = row/bs; /* Block number */ 225 bp = row % bs; /* Block position */ 226 M = ai[bn+1] - ai[bn]; 227 *ncols = bs*M; 228 229 if (v) { 230 *v = 0; 231 if (*ncols) { 232 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 233 for (i=0; i<M; i++) { /* for each block in the block row */ 234 v_i = *v + i*bs; 235 aa_i = aa + bs2*(ai[bn] + i); 236 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 237 } 238 } 239 } 240 241 if (cols) { 242 *cols = 0; 243 if (*ncols) { 244 ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr); 245 for (i=0; i<M; i++) { /* for each block in the block row */ 246 cols_i = *cols + i*bs; 247 itmp = bs*aj[ai[bn] + i]; 248 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 249 } 250 } 251 } 252 253 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 254 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 255 #ifdef column_search 256 v_i = *v + M*bs; 257 cols_i = *cols + M*bs; 258 for (i=0; i<bn; i++){ /* for each block row */ 259 M = ai[i+1] - ai[i]; 260 for (j=0; j<M; j++){ 261 itmp = aj[ai[i] + j]; /* block column value */ 262 if (itmp == bn){ 263 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 264 for (k=0; k<bs; k++) { 265 *cols_i++ = i*bs+k; 266 *v_i++ = aa_i[k]; 267 } 268 *ncols += bs; 269 break; 270 } 271 } 272 } 273 #endif 274 275 PetscFunctionReturn(0); 276 } 277 278 #undef __FUNCT__ 279 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 280 PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 281 { 282 PetscErrorCode ierr; 283 284 PetscFunctionBegin; 285 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 286 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 287 PetscFunctionReturn(0); 288 } 289 290 #undef __FUNCT__ 291 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 292 PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,Mat *B) 293 { 294 PetscErrorCode ierr; 295 PetscFunctionBegin; 296 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 297 PetscFunctionReturn(0); 298 } 299 300 #undef __FUNCT__ 301 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 302 static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 303 { 304 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 305 PetscErrorCode ierr; 306 int i,j,bs = a->bs,k,l,bs2=a->bs2; 307 char *name; 308 PetscViewerFormat format; 309 310 PetscFunctionBegin; 311 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 312 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 313 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 314 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 315 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 316 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 317 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 318 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 319 for (i=0; i<a->mbs; i++) { 320 for (j=0; j<bs; j++) { 321 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 322 for (k=a->i[i]; k<a->i[i+1]; k++) { 323 for (l=0; l<bs; l++) { 324 #if defined(PETSC_USE_COMPLEX) 325 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 326 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 327 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 328 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 329 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 330 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 331 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 332 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 333 } 334 #else 335 if (a->a[bs2*k + l*bs + j] != 0.0) { 336 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 337 } 338 #endif 339 } 340 } 341 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 342 } 343 } 344 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 345 } else { 346 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 347 for (i=0; i<a->mbs; i++) { 348 for (j=0; j<bs; j++) { 349 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 350 for (k=a->i[i]; k<a->i[i+1]; k++) { 351 for (l=0; l<bs; l++) { 352 #if defined(PETSC_USE_COMPLEX) 353 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 354 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 355 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 356 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 357 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 358 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 359 } else { 360 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 361 } 362 #else 363 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 364 #endif 365 } 366 } 367 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 368 } 369 } 370 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 371 } 372 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 373 PetscFunctionReturn(0); 374 } 375 376 #undef __FUNCT__ 377 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 378 static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 379 { 380 Mat A = (Mat) Aa; 381 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 382 PetscErrorCode ierr; 383 int row,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank; 384 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 385 MatScalar *aa; 386 MPI_Comm comm; 387 PetscViewer viewer; 388 389 PetscFunctionBegin; 390 /* 391 This is nasty. If this is called from an originally parallel matrix 392 then all processes call this,but only the first has the matrix so the 393 rest should return immediately. 394 */ 395 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 396 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 397 if (rank) PetscFunctionReturn(0); 398 399 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 400 401 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 402 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 403 404 /* loop over matrix elements drawing boxes */ 405 color = PETSC_DRAW_BLUE; 406 for (i=0,row=0; i<mbs; i++,row+=bs) { 407 for (j=a->i[i]; j<a->i[i+1]; j++) { 408 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 409 x_l = a->j[j]*bs; x_r = x_l + 1.0; 410 aa = a->a + j*bs2; 411 for (k=0; k<bs; k++) { 412 for (l=0; l<bs; l++) { 413 if (PetscRealPart(*aa++) >= 0.) continue; 414 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 415 } 416 } 417 } 418 } 419 color = PETSC_DRAW_CYAN; 420 for (i=0,row=0; i<mbs; i++,row+=bs) { 421 for (j=a->i[i]; j<a->i[i+1]; j++) { 422 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 423 x_l = a->j[j]*bs; x_r = x_l + 1.0; 424 aa = a->a + j*bs2; 425 for (k=0; k<bs; k++) { 426 for (l=0; l<bs; l++) { 427 if (PetscRealPart(*aa++) != 0.) continue; 428 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 429 } 430 } 431 } 432 } 433 434 color = PETSC_DRAW_RED; 435 for (i=0,row=0; i<mbs; i++,row+=bs) { 436 for (j=a->i[i]; j<a->i[i+1]; j++) { 437 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 438 x_l = a->j[j]*bs; x_r = x_l + 1.0; 439 aa = a->a + j*bs2; 440 for (k=0; k<bs; k++) { 441 for (l=0; l<bs; l++) { 442 if (PetscRealPart(*aa++) <= 0.) continue; 443 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 444 } 445 } 446 } 447 } 448 PetscFunctionReturn(0); 449 } 450 451 #undef __FUNCT__ 452 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 453 static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 454 { 455 PetscErrorCode ierr; 456 PetscReal xl,yl,xr,yr,w,h; 457 PetscDraw draw; 458 PetscTruth isnull; 459 460 PetscFunctionBegin; 461 462 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 463 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 464 465 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 466 xr = A->m; yr = A->m; h = yr/10.0; w = xr/10.0; 467 xr += w; yr += h; xl = -w; yl = -h; 468 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 469 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 470 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 471 PetscFunctionReturn(0); 472 } 473 474 #undef __FUNCT__ 475 #define __FUNCT__ "MatView_SeqSBAIJ" 476 PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 477 { 478 PetscErrorCode ierr; 479 PetscTruth iascii,isdraw; 480 481 PetscFunctionBegin; 482 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 483 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 484 if (iascii){ 485 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 486 } else if (isdraw) { 487 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 488 } else { 489 Mat B; 490 ierr = MatConvert(A,MATSEQAIJ,&B);CHKERRQ(ierr); 491 ierr = MatView(B,viewer);CHKERRQ(ierr); 492 ierr = MatDestroy(B);CHKERRQ(ierr); 493 } 494 PetscFunctionReturn(0); 495 } 496 497 498 #undef __FUNCT__ 499 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 500 PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[]) 501 { 502 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 503 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 504 int *ai = a->i,*ailen = a->ilen; 505 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 506 MatScalar *ap,*aa = a->a,zero = 0.0; 507 508 PetscFunctionBegin; 509 for (k=0; k<m; k++) { /* loop over rows */ 510 row = im[k]; brow = row/bs; 511 if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",row); 512 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1); 513 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 514 nrow = ailen[brow]; 515 for (l=0; l<n; l++) { /* loop over columns */ 516 if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",in[l]); 517 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1); 518 col = in[l] ; 519 bcol = col/bs; 520 cidx = col%bs; 521 ridx = row%bs; 522 high = nrow; 523 low = 0; /* assume unsorted */ 524 while (high-low > 5) { 525 t = (low+high)/2; 526 if (rp[t] > bcol) high = t; 527 else low = t; 528 } 529 for (i=low; i<high; i++) { 530 if (rp[i] > bcol) break; 531 if (rp[i] == bcol) { 532 *v++ = ap[bs2*i+bs*cidx+ridx]; 533 goto finished; 534 } 535 } 536 *v++ = zero; 537 finished:; 538 } 539 } 540 PetscFunctionReturn(0); 541 } 542 543 544 #undef __FUNCT__ 545 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 546 PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is) 547 { 548 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 549 PetscErrorCode ierr; 550 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 551 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 552 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval; 553 PetscTruth roworiented=a->roworiented; 554 const MatScalar *value = v; 555 MatScalar *ap,*aa = a->a,*bap; 556 557 PetscFunctionBegin; 558 if (roworiented) { 559 stepval = (n-1)*bs; 560 } else { 561 stepval = (m-1)*bs; 562 } 563 for (k=0; k<m; k++) { /* loop over added rows */ 564 row = im[k]; 565 if (row < 0) continue; 566 #if defined(PETSC_USE_BOPT_g) 567 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1); 568 #endif 569 rp = aj + ai[row]; 570 ap = aa + bs2*ai[row]; 571 rmax = imax[row]; 572 nrow = ailen[row]; 573 low = 0; 574 for (l=0; l<n; l++) { /* loop over added columns */ 575 if (in[l] < 0) continue; 576 col = in[l]; 577 #if defined(PETSC_USE_BOPT_g) 578 if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",col,a->nbs-1); 579 #endif 580 if (col < row) continue; /* ignore lower triangular block */ 581 if (roworiented) { 582 value = v + k*(stepval+bs)*bs + l*bs; 583 } else { 584 value = v + l*(stepval+bs)*bs + k*bs; 585 } 586 if (!sorted) low = 0; high = nrow; 587 while (high-low > 7) { 588 t = (low+high)/2; 589 if (rp[t] > col) high = t; 590 else low = t; 591 } 592 for (i=low; i<high; i++) { 593 if (rp[i] > col) break; 594 if (rp[i] == col) { 595 bap = ap + bs2*i; 596 if (roworiented) { 597 if (is == ADD_VALUES) { 598 for (ii=0; ii<bs; ii++,value+=stepval) { 599 for (jj=ii; jj<bs2; jj+=bs) { 600 bap[jj] += *value++; 601 } 602 } 603 } else { 604 for (ii=0; ii<bs; ii++,value+=stepval) { 605 for (jj=ii; jj<bs2; jj+=bs) { 606 bap[jj] = *value++; 607 } 608 } 609 } 610 } else { 611 if (is == ADD_VALUES) { 612 for (ii=0; ii<bs; ii++,value+=stepval) { 613 for (jj=0; jj<bs; jj++) { 614 *bap++ += *value++; 615 } 616 } 617 } else { 618 for (ii=0; ii<bs; ii++,value+=stepval) { 619 for (jj=0; jj<bs; jj++) { 620 *bap++ = *value++; 621 } 622 } 623 } 624 } 625 goto noinsert2; 626 } 627 } 628 if (nonew == 1) goto noinsert2; 629 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 630 if (nrow >= rmax) { 631 /* there is no extra room in row, therefore enlarge */ 632 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 633 MatScalar *new_a; 634 635 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 636 637 /* malloc new storage space */ 638 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 639 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 640 new_j = (int*)(new_a + bs2*new_nz); 641 new_i = new_j + new_nz; 642 643 /* copy over old data into new slots */ 644 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 645 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 646 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 647 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 648 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 649 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 650 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 651 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 652 /* free up old matrix storage */ 653 ierr = PetscFree(a->a);CHKERRQ(ierr); 654 if (!a->singlemalloc) { 655 ierr = PetscFree(a->i);CHKERRQ(ierr); 656 ierr = PetscFree(a->j);CHKERRQ(ierr); 657 } 658 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 659 a->singlemalloc = PETSC_TRUE; 660 661 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 662 rmax = imax[row] = imax[row] + CHUNKSIZE; 663 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 664 a->maxnz += bs2*CHUNKSIZE; 665 a->reallocs++; 666 a->nz++; 667 } 668 N = nrow++ - 1; 669 /* shift up all the later entries in this row */ 670 for (ii=N; ii>=i; ii--) { 671 rp[ii+1] = rp[ii]; 672 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 673 } 674 if (N >= i) { 675 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 676 } 677 rp[i] = col; 678 bap = ap + bs2*i; 679 if (roworiented) { 680 for (ii=0; ii<bs; ii++,value+=stepval) { 681 for (jj=ii; jj<bs2; jj+=bs) { 682 bap[jj] = *value++; 683 } 684 } 685 } else { 686 for (ii=0; ii<bs; ii++,value+=stepval) { 687 for (jj=0; jj<bs; jj++) { 688 *bap++ = *value++; 689 } 690 } 691 } 692 noinsert2:; 693 low = i; 694 } 695 ailen[row] = nrow; 696 } 697 PetscFunctionReturn(0); 698 } 699 700 #undef __FUNCT__ 701 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 702 PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 703 { 704 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 705 PetscErrorCode ierr; 706 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 707 int m = A->m,*ip,N,*ailen = a->ilen; 708 int mbs = a->mbs,bs2 = a->bs2,rmax = 0; 709 MatScalar *aa = a->a,*ap; 710 711 PetscFunctionBegin; 712 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 713 714 if (m) rmax = ailen[0]; 715 for (i=1; i<mbs; i++) { 716 /* move each row back by the amount of empty slots (fshift) before it*/ 717 fshift += imax[i-1] - ailen[i-1]; 718 rmax = PetscMax(rmax,ailen[i]); 719 if (fshift) { 720 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 721 N = ailen[i]; 722 for (j=0; j<N; j++) { 723 ip[j-fshift] = ip[j]; 724 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 725 } 726 } 727 ai[i] = ai[i-1] + ailen[i-1]; 728 } 729 if (mbs) { 730 fshift += imax[mbs-1] - ailen[mbs-1]; 731 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 732 } 733 /* reset ilen and imax for each row */ 734 for (i=0; i<mbs; i++) { 735 ailen[i] = imax[i] = ai[i+1] - ai[i]; 736 } 737 a->nz = ai[mbs]; 738 739 /* diagonals may have moved, reset it */ 740 if (a->diag) { 741 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(int));CHKERRQ(ierr); 742 } 743 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 744 m,A->m,a->bs,fshift*bs2,a->nz*bs2); 745 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n", 746 a->reallocs); 747 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 748 a->reallocs = 0; 749 A->info.nz_unneeded = (PetscReal)fshift*bs2; 750 751 PetscFunctionReturn(0); 752 } 753 754 /* 755 This function returns an array of flags which indicate the locations of contiguous 756 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 757 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 758 Assume: sizes should be long enough to hold all the values. 759 */ 760 #undef __FUNCT__ 761 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 762 PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 763 { 764 int i,j,k,row; 765 PetscTruth flg; 766 767 PetscFunctionBegin; 768 for (i=0,j=0; i<n; j++) { 769 row = idx[i]; 770 if (row%bs!=0) { /* Not the begining of a block */ 771 sizes[j] = 1; 772 i++; 773 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 774 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 775 i++; 776 } else { /* Begining of the block, so check if the complete block exists */ 777 flg = PETSC_TRUE; 778 for (k=1; k<bs; k++) { 779 if (row+k != idx[i+k]) { /* break in the block */ 780 flg = PETSC_FALSE; 781 break; 782 } 783 } 784 if (flg == PETSC_TRUE) { /* No break in the bs */ 785 sizes[j] = bs; 786 i+= bs; 787 } else { 788 sizes[j] = 1; 789 i++; 790 } 791 } 792 } 793 *bs_max = j; 794 PetscFunctionReturn(0); 795 } 796 797 #undef __FUNCT__ 798 #define __FUNCT__ "MatZeroRows_SeqSBAIJ" 799 PetscErrorCode MatZeroRows_SeqSBAIJ(Mat A,IS is,const PetscScalar *diag) 800 { 801 PetscFunctionBegin; 802 SETERRQ(PETSC_ERR_SUP,"No support for this function yet"); 803 } 804 805 /* Only add/insert a(i,j) with i<=j (blocks). 806 Any a(i,j) with i>j input by user is ingored. 807 */ 808 809 #undef __FUNCT__ 810 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 811 PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is) 812 { 813 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 814 PetscErrorCode ierr; 815 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 816 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 817 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 818 int ridx,cidx,bs2=a->bs2; 819 MatScalar *ap,value,*aa=a->a,*bap; 820 821 PetscFunctionBegin; 822 823 for (k=0; k<m; k++) { /* loop over added rows */ 824 row = im[k]; /* row number */ 825 brow = row/bs; /* block row number */ 826 if (row < 0) continue; 827 #if defined(PETSC_USE_BOPT_g) 828 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1); 829 #endif 830 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 831 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 832 rmax = imax[brow]; /* maximum space allocated for this row */ 833 nrow = ailen[brow]; /* actual length of this row */ 834 low = 0; 835 836 for (l=0; l<n; l++) { /* loop over added columns */ 837 if (in[l] < 0) continue; 838 #if defined(PETSC_USE_BOPT_g) 839 if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m-1); 840 #endif 841 col = in[l]; 842 bcol = col/bs; /* block col number */ 843 844 if (brow <= bcol){ 845 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 846 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 847 /* element value a(k,l) */ 848 if (roworiented) { 849 value = v[l + k*n]; 850 } else { 851 value = v[k + l*m]; 852 } 853 854 /* move pointer bap to a(k,l) quickly and add/insert value */ 855 if (!sorted) low = 0; high = nrow; 856 while (high-low > 7) { 857 t = (low+high)/2; 858 if (rp[t] > bcol) high = t; 859 else low = t; 860 } 861 for (i=low; i<high; i++) { 862 /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */ 863 if (rp[i] > bcol) break; 864 if (rp[i] == bcol) { 865 bap = ap + bs2*i + bs*cidx + ridx; 866 if (is == ADD_VALUES) *bap += value; 867 else *bap = value; 868 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 869 if (brow == bcol && ridx < cidx){ 870 bap = ap + bs2*i + bs*ridx + cidx; 871 if (is == ADD_VALUES) *bap += value; 872 else *bap = value; 873 } 874 goto noinsert1; 875 } 876 } 877 878 if (nonew == 1) goto noinsert1; 879 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 880 if (nrow >= rmax) { 881 /* there is no extra room in row, therefore enlarge */ 882 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 883 MatScalar *new_a; 884 885 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 886 887 /* Malloc new storage space */ 888 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 889 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 890 new_j = (int*)(new_a + bs2*new_nz); 891 new_i = new_j + new_nz; 892 893 /* copy over old data into new slots */ 894 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 895 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 896 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 897 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 898 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 899 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 900 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 901 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 902 /* free up old matrix storage */ 903 ierr = PetscFree(a->a);CHKERRQ(ierr); 904 if (!a->singlemalloc) { 905 ierr = PetscFree(a->i);CHKERRQ(ierr); 906 ierr = PetscFree(a->j);CHKERRQ(ierr); 907 } 908 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 909 a->singlemalloc = PETSC_TRUE; 910 911 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 912 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 913 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 914 a->maxnz += bs2*CHUNKSIZE; 915 a->reallocs++; 916 a->nz++; 917 } 918 919 N = nrow++ - 1; 920 /* shift up all the later entries in this row */ 921 for (ii=N; ii>=i; ii--) { 922 rp[ii+1] = rp[ii]; 923 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 924 } 925 if (N>=i) { 926 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 927 } 928 rp[i] = bcol; 929 ap[bs2*i + bs*cidx + ridx] = value; 930 noinsert1:; 931 low = i; 932 /* } */ 933 } 934 } /* end of if .. if.. */ 935 } /* end of loop over added columns */ 936 ailen[brow] = nrow; 937 } /* end of loop over added rows */ 938 939 PetscFunctionReturn(0); 940 } 941 942 EXTERN PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*); 943 EXTERN PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*); 944 EXTERN PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS[],int); 945 EXTERN PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*); 946 EXTERN PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat,int,const IS[],const IS[],MatReuse,Mat*[]); 947 EXTERN PetscErrorCode MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec); 948 EXTERN PetscErrorCode MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec); 949 EXTERN PetscErrorCode MatScale_SeqSBAIJ(const PetscScalar*,Mat); 950 EXTERN PetscErrorCode MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *); 951 EXTERN PetscErrorCode MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*); 952 EXTERN PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat,Vec); 953 EXTERN PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec); 954 EXTERN PetscErrorCode MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *); 955 EXTERN PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat); 956 EXTERN PetscErrorCode MatGetRowMax_SeqSBAIJ(Mat,Vec); 957 958 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N(Mat,Vec,Vec); 959 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1(Mat,Vec,Vec); 960 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2(Mat,Vec,Vec); 961 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3(Mat,Vec,Vec); 962 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4(Mat,Vec,Vec); 963 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5(Mat,Vec,Vec); 964 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6(Mat,Vec,Vec); 965 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7(Mat,Vec,Vec); 966 EXTERN PetscErrorCode MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec); 967 EXTERN PetscErrorCode MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec); 968 EXTERN PetscErrorCode MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec); 969 EXTERN PetscErrorCode MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec); 970 EXTERN PetscErrorCode MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec); 971 EXTERN PetscErrorCode MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec); 972 EXTERN PetscErrorCode MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec); 973 974 EXTERN PetscErrorCode MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs); 975 976 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec); 977 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec); 978 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec); 979 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 980 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec); 981 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec); 982 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec); 983 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec); 984 985 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*); 986 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*); 987 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*); 988 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*); 989 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*); 990 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*); 991 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*); 992 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*); 993 EXTERN PetscErrorCode MatGetInertia_SeqSBAIJ(Mat,int*,int*,int*); 994 995 EXTERN PetscErrorCode MatMult_SeqSBAIJ_1(Mat,Vec,Vec); 996 EXTERN PetscErrorCode MatMult_SeqSBAIJ_2(Mat,Vec,Vec); 997 EXTERN PetscErrorCode MatMult_SeqSBAIJ_3(Mat,Vec,Vec); 998 EXTERN PetscErrorCode MatMult_SeqSBAIJ_4(Mat,Vec,Vec); 999 EXTERN PetscErrorCode MatMult_SeqSBAIJ_5(Mat,Vec,Vec); 1000 EXTERN PetscErrorCode MatMult_SeqSBAIJ_6(Mat,Vec,Vec); 1001 EXTERN PetscErrorCode MatMult_SeqSBAIJ_7(Mat,Vec,Vec); 1002 EXTERN PetscErrorCode MatMult_SeqSBAIJ_N(Mat,Vec,Vec); 1003 1004 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec); 1005 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec); 1006 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec); 1007 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec); 1008 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec); 1009 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec); 1010 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec); 1011 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec); 1012 1013 #ifdef HAVE_MatICCFactor 1014 /* modified from MatILUFactor_SeqSBAIJ, needs further work! */ 1015 #undef __FUNCT__ 1016 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 1017 PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info) 1018 { 1019 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 1020 Mat outA; 1021 PetscErrorCode ierr; 1022 PetscTruth row_identity,col_identity; 1023 1024 PetscFunctionBegin; 1025 /* 1026 if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1027 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1028 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1029 if (!row_identity || !col_identity) { 1030 SETERRQ(1,"Row and column permutations must be identity for in-place ICC"); 1031 } 1032 */ 1033 1034 outA = inA; 1035 inA->factor = FACTOR_CHOLESKY; 1036 1037 if (!a->diag) { 1038 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 1039 } 1040 /* 1041 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1042 for ILU(0) factorization with natural ordering 1043 */ 1044 switch (a->bs) { 1045 case 1: 1046 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1047 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1048 inA->ops->solves = MatSolves_SeqSBAIJ_1; 1049 PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n"); 1050 case 2: 1051 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 1052 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1053 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1054 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1055 break; 1056 case 3: 1057 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 1058 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1059 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1060 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n"); 1061 break; 1062 case 4: 1063 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 1064 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1065 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1066 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1067 break; 1068 case 5: 1069 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 1070 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1071 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1072 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1073 break; 1074 case 6: 1075 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 1076 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1077 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1078 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1079 break; 1080 case 7: 1081 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 1082 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1083 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1084 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1085 break; 1086 default: 1087 a->row = row; 1088 a->icol = col; 1089 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1090 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1091 1092 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1093 ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr); 1094 PetscLogObjectParent(inA,a->icol); 1095 1096 if (!a->solve_work) { 1097 ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1098 PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar)); 1099 } 1100 } 1101 1102 ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr); 1103 1104 PetscFunctionReturn(0); 1105 } 1106 #endif 1107 1108 #undef __FUNCT__ 1109 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ" 1110 PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat A) 1111 { 1112 static PetscTruth called = PETSC_FALSE; 1113 MPI_Comm comm = A->comm; 1114 PetscErrorCode ierr; 1115 1116 PetscFunctionBegin; 1117 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1118 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1119 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1120 PetscFunctionReturn(0); 1121 } 1122 1123 EXTERN_C_BEGIN 1124 #undef __FUNCT__ 1125 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1126 PetscErrorCode MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices) 1127 { 1128 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1129 int i,nz,n; 1130 1131 PetscFunctionBegin; 1132 nz = baij->maxnz; 1133 n = mat->n; 1134 for (i=0; i<nz; i++) { 1135 baij->j[i] = indices[i]; 1136 } 1137 baij->nz = nz; 1138 for (i=0; i<n; i++) { 1139 baij->ilen[i] = baij->imax[i]; 1140 } 1141 1142 PetscFunctionReturn(0); 1143 } 1144 EXTERN_C_END 1145 1146 #undef __FUNCT__ 1147 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1148 /*@ 1149 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1150 in the matrix. 1151 1152 Input Parameters: 1153 + mat - the SeqSBAIJ matrix 1154 - indices - the column indices 1155 1156 Level: advanced 1157 1158 Notes: 1159 This can be called if you have precomputed the nonzero structure of the 1160 matrix and want to provide it to the matrix object to improve the performance 1161 of the MatSetValues() operation. 1162 1163 You MUST have set the correct numbers of nonzeros per row in the call to 1164 MatCreateSeqSBAIJ(). 1165 1166 MUST be called before any calls to MatSetValues() 1167 1168 .seealso: MatCreateSeqSBAIJ 1169 @*/ 1170 PetscErrorCode MatSeqSBAIJSetColumnIndices(Mat mat,int *indices) 1171 { 1172 PetscErrorCode ierr,(*f)(Mat,int *); 1173 1174 PetscFunctionBegin; 1175 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1176 PetscValidPointer(indices,2); 1177 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1178 if (f) { 1179 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1180 } else { 1181 SETERRQ(1,"Wrong type of matrix to set column indices"); 1182 } 1183 PetscFunctionReturn(0); 1184 } 1185 1186 #undef __FUNCT__ 1187 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1188 PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A) 1189 { 1190 PetscErrorCode ierr; 1191 1192 PetscFunctionBegin; 1193 ierr = MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1194 PetscFunctionReturn(0); 1195 } 1196 1197 #undef __FUNCT__ 1198 #define __FUNCT__ "MatGetArray_SeqSBAIJ" 1199 PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1200 { 1201 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1202 PetscFunctionBegin; 1203 *array = a->a; 1204 PetscFunctionReturn(0); 1205 } 1206 1207 #undef __FUNCT__ 1208 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ" 1209 PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1210 { 1211 PetscFunctionBegin; 1212 PetscFunctionReturn(0); 1213 } 1214 1215 #include "petscblaslapack.h" 1216 #undef __FUNCT__ 1217 #define __FUNCT__ "MatAXPY_SeqSBAIJ" 1218 PetscErrorCode MatAXPY_SeqSBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str) 1219 { 1220 Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data; 1221 PetscErrorCode ierr; 1222 int i,bs=y->bs,bs2,j; 1223 PetscBLASInt bnz = (PetscBLASInt)x->nz,one = 1; 1224 1225 PetscFunctionBegin; 1226 if (str == SAME_NONZERO_PATTERN) { 1227 BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1228 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1229 if (y->xtoy && y->XtoY != X) { 1230 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1231 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1232 } 1233 if (!y->xtoy) { /* get xtoy */ 1234 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1235 y->XtoY = X; 1236 } 1237 bs2 = bs*bs; 1238 for (i=0; i<x->nz; i++) { 1239 j = 0; 1240 while (j < bs2){ 1241 y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]); 1242 j++; 1243 } 1244 } 1245 PetscLogInfo(0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %d/%d = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)); 1246 } else { 1247 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1248 } 1249 PetscFunctionReturn(0); 1250 } 1251 1252 #undef __FUNCT__ 1253 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ" 1254 PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg) 1255 { 1256 PetscFunctionBegin; 1257 *flg = PETSC_TRUE; 1258 PetscFunctionReturn(0); 1259 } 1260 1261 #undef __FUNCT__ 1262 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ" 1263 PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg) 1264 { 1265 PetscFunctionBegin; 1266 *flg = PETSC_TRUE; 1267 PetscFunctionReturn(0); 1268 } 1269 1270 #undef __FUNCT__ 1271 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ" 1272 PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscTruth *flg) 1273 { 1274 PetscFunctionBegin; 1275 *flg = PETSC_FALSE; 1276 PetscFunctionReturn(0); 1277 } 1278 1279 /* -------------------------------------------------------------------*/ 1280 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1281 MatGetRow_SeqSBAIJ, 1282 MatRestoreRow_SeqSBAIJ, 1283 MatMult_SeqSBAIJ_N, 1284 /* 4*/ MatMultAdd_SeqSBAIJ_N, 1285 MatMultTranspose_SeqSBAIJ, 1286 MatMultTransposeAdd_SeqSBAIJ, 1287 MatSolve_SeqSBAIJ_N, 1288 0, 1289 0, 1290 /*10*/ 0, 1291 0, 1292 MatCholeskyFactor_SeqSBAIJ, 1293 MatRelax_SeqSBAIJ, 1294 MatTranspose_SeqSBAIJ, 1295 /*15*/ MatGetInfo_SeqSBAIJ, 1296 MatEqual_SeqSBAIJ, 1297 MatGetDiagonal_SeqSBAIJ, 1298 MatDiagonalScale_SeqSBAIJ, 1299 MatNorm_SeqSBAIJ, 1300 /*20*/ 0, 1301 MatAssemblyEnd_SeqSBAIJ, 1302 0, 1303 MatSetOption_SeqSBAIJ, 1304 MatZeroEntries_SeqSBAIJ, 1305 /*25*/ MatZeroRows_SeqSBAIJ, 1306 0, 1307 0, 1308 MatCholeskyFactorSymbolic_SeqSBAIJ, 1309 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1310 /*30*/ MatSetUpPreallocation_SeqSBAIJ, 1311 0, 1312 MatICCFactorSymbolic_SeqSBAIJ, 1313 MatGetArray_SeqSBAIJ, 1314 MatRestoreArray_SeqSBAIJ, 1315 /*35*/ MatDuplicate_SeqSBAIJ, 1316 0, 1317 0, 1318 0, 1319 0, 1320 /*40*/ MatAXPY_SeqSBAIJ, 1321 MatGetSubMatrices_SeqSBAIJ, 1322 MatIncreaseOverlap_SeqSBAIJ, 1323 MatGetValues_SeqSBAIJ, 1324 0, 1325 /*45*/ MatPrintHelp_SeqSBAIJ, 1326 MatScale_SeqSBAIJ, 1327 0, 1328 0, 1329 0, 1330 /*50*/ MatGetBlockSize_SeqSBAIJ, 1331 MatGetRowIJ_SeqSBAIJ, 1332 MatRestoreRowIJ_SeqSBAIJ, 1333 0, 1334 0, 1335 /*55*/ 0, 1336 0, 1337 0, 1338 0, 1339 MatSetValuesBlocked_SeqSBAIJ, 1340 /*60*/ MatGetSubMatrix_SeqSBAIJ, 1341 0, 1342 0, 1343 MatGetPetscMaps_Petsc, 1344 0, 1345 /*65*/ 0, 1346 0, 1347 0, 1348 0, 1349 0, 1350 /*70*/ MatGetRowMax_SeqSBAIJ, 1351 0, 1352 0, 1353 0, 1354 0, 1355 /*75*/ 0, 1356 0, 1357 0, 1358 0, 1359 0, 1360 /*80*/ 0, 1361 0, 1362 0, 1363 #if !defined(PETSC_USE_COMPLEX) 1364 MatGetInertia_SeqSBAIJ, 1365 #else 1366 0, 1367 #endif 1368 MatLoad_SeqSBAIJ, 1369 /*85*/ MatIsSymmetric_SeqSBAIJ, 1370 MatIsHermitian_SeqSBAIJ, 1371 MatIsStructurallySymmetric_SeqSBAIJ, 1372 0, 1373 0, 1374 /*90*/ 0, 1375 0, 1376 0, 1377 0, 1378 0, 1379 /*95*/ 0, 1380 0, 1381 0, 1382 0}; 1383 1384 EXTERN_C_BEGIN 1385 #undef __FUNCT__ 1386 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1387 PetscErrorCode MatStoreValues_SeqSBAIJ(Mat mat) 1388 { 1389 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1390 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1391 PetscErrorCode ierr; 1392 1393 PetscFunctionBegin; 1394 if (aij->nonew != 1) { 1395 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1396 } 1397 1398 /* allocate space for values if not already there */ 1399 if (!aij->saved_values) { 1400 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1401 } 1402 1403 /* copy values over */ 1404 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1405 PetscFunctionReturn(0); 1406 } 1407 EXTERN_C_END 1408 1409 EXTERN_C_BEGIN 1410 #undef __FUNCT__ 1411 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1412 PetscErrorCode MatRetrieveValues_SeqSBAIJ(Mat mat) 1413 { 1414 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1415 PetscErrorCode ierr; 1416 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1417 1418 PetscFunctionBegin; 1419 if (aij->nonew != 1) { 1420 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1421 } 1422 if (!aij->saved_values) { 1423 SETERRQ(1,"Must call MatStoreValues(A);first"); 1424 } 1425 1426 /* copy values over */ 1427 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1428 PetscFunctionReturn(0); 1429 } 1430 EXTERN_C_END 1431 1432 EXTERN_C_BEGIN 1433 #undef __FUNCT__ 1434 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ" 1435 PetscErrorCode MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,int bs,int nz,int *nnz) 1436 { 1437 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1438 PetscErrorCode ierr; 1439 int i,len,mbs,bs2; 1440 PetscTruth flg; 1441 1442 PetscFunctionBegin; 1443 B->preallocated = PETSC_TRUE; 1444 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1445 mbs = B->m/bs; 1446 bs2 = bs*bs; 1447 1448 if (mbs*bs != B->m) { 1449 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1450 } 1451 1452 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1453 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1454 if (nnz) { 1455 for (i=0; i<mbs; i++) { 1456 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1457 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],mbs); 1458 } 1459 } 1460 1461 ierr = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1462 if (!flg) { 1463 switch (bs) { 1464 case 1: 1465 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1466 B->ops->solve = MatSolve_SeqSBAIJ_1; 1467 B->ops->solves = MatSolves_SeqSBAIJ_1; 1468 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_1; 1469 B->ops->mult = MatMult_SeqSBAIJ_1; 1470 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1471 break; 1472 case 2: 1473 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1474 B->ops->solve = MatSolve_SeqSBAIJ_2; 1475 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_2; 1476 B->ops->mult = MatMult_SeqSBAIJ_2; 1477 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1478 break; 1479 case 3: 1480 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1481 B->ops->solve = MatSolve_SeqSBAIJ_3; 1482 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_3; 1483 B->ops->mult = MatMult_SeqSBAIJ_3; 1484 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1485 break; 1486 case 4: 1487 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1488 B->ops->solve = MatSolve_SeqSBAIJ_4; 1489 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_4; 1490 B->ops->mult = MatMult_SeqSBAIJ_4; 1491 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1492 break; 1493 case 5: 1494 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1495 B->ops->solve = MatSolve_SeqSBAIJ_5; 1496 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_5; 1497 B->ops->mult = MatMult_SeqSBAIJ_5; 1498 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1499 break; 1500 case 6: 1501 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1502 B->ops->solve = MatSolve_SeqSBAIJ_6; 1503 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_6; 1504 B->ops->mult = MatMult_SeqSBAIJ_6; 1505 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1506 break; 1507 case 7: 1508 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1509 B->ops->solve = MatSolve_SeqSBAIJ_7; 1510 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_7; 1511 B->ops->mult = MatMult_SeqSBAIJ_7; 1512 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1513 break; 1514 } 1515 } 1516 1517 b->mbs = mbs; 1518 b->nbs = mbs; 1519 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 1520 if (!nnz) { 1521 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1522 else if (nz <= 0) nz = 1; 1523 for (i=0; i<mbs; i++) { 1524 b->imax[i] = nz; 1525 } 1526 nz = nz*mbs; /* total nz */ 1527 } else { 1528 nz = 0; 1529 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1530 } 1531 /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1532 1533 /* allocate the matrix space */ 1534 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 1535 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1536 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1537 b->j = (int*)(b->a + nz*bs2); 1538 ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 1539 b->i = b->j + nz; 1540 b->singlemalloc = PETSC_TRUE; 1541 1542 /* pointer to beginning of each row */ 1543 b->i[0] = 0; 1544 for (i=1; i<mbs+1; i++) { 1545 b->i[i] = b->i[i-1] + b->imax[i-1]; 1546 } 1547 1548 /* b->ilen will count nonzeros in each block row so far. */ 1549 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 1550 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1551 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1552 1553 b->bs = bs; 1554 b->bs2 = bs2; 1555 b->nz = 0; 1556 b->maxnz = nz*bs2; 1557 1558 b->inew = 0; 1559 b->jnew = 0; 1560 b->anew = 0; 1561 b->a2anew = 0; 1562 b->permute = PETSC_FALSE; 1563 PetscFunctionReturn(0); 1564 } 1565 EXTERN_C_END 1566 1567 EXTERN_C_BEGIN 1568 EXTERN PetscErrorCode MatConvert_SeqSBAIJ_SeqAIJ(Mat,const MatType,Mat*); 1569 EXTERN PetscErrorCode MatConvert_SeqSBAIJ_SeqBAIJ(Mat,const MatType,Mat*); 1570 EXTERN_C_END 1571 1572 /*MC 1573 MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices, 1574 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1575 1576 Options Database Keys: 1577 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions() 1578 1579 Level: beginner 1580 1581 .seealso: MatCreateSeqSBAIJ 1582 M*/ 1583 1584 EXTERN_C_BEGIN 1585 #undef __FUNCT__ 1586 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1587 PetscErrorCode MatCreate_SeqSBAIJ(Mat B) 1588 { 1589 Mat_SeqSBAIJ *b; 1590 PetscErrorCode ierr; 1591 int size; 1592 1593 PetscFunctionBegin; 1594 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1595 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1596 B->m = B->M = PetscMax(B->m,B->M); 1597 B->n = B->N = PetscMax(B->n,B->N); 1598 1599 ierr = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1600 B->data = (void*)b; 1601 ierr = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr); 1602 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1603 B->ops->destroy = MatDestroy_SeqSBAIJ; 1604 B->ops->view = MatView_SeqSBAIJ; 1605 B->factor = 0; 1606 B->lupivotthreshold = 1.0; 1607 B->mapping = 0; 1608 b->row = 0; 1609 b->icol = 0; 1610 b->reallocs = 0; 1611 b->saved_values = 0; 1612 1613 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1614 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1615 1616 b->sorted = PETSC_FALSE; 1617 b->roworiented = PETSC_TRUE; 1618 b->nonew = 0; 1619 b->diag = 0; 1620 b->solve_work = 0; 1621 b->mult_work = 0; 1622 B->spptr = 0; 1623 b->keepzeroedrows = PETSC_FALSE; 1624 b->xtoy = 0; 1625 b->XtoY = 0; 1626 1627 b->inew = 0; 1628 b->jnew = 0; 1629 b->anew = 0; 1630 b->a2anew = 0; 1631 b->permute = PETSC_FALSE; 1632 1633 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1634 "MatStoreValues_SeqSBAIJ", 1635 MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1636 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1637 "MatRetrieveValues_SeqSBAIJ", 1638 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1639 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1640 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1641 MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1642 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C", 1643 "MatConvert_SeqSBAIJ_SeqAIJ", 1644 MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr); 1645 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C", 1646 "MatConvert_SeqSBAIJ_SeqBAIJ", 1647 MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr); 1648 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C", 1649 "MatSeqSBAIJSetPreallocation_SeqSBAIJ", 1650 MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr); 1651 1652 B->symmetric = PETSC_TRUE; 1653 B->structurally_symmetric = PETSC_TRUE; 1654 B->symmetric_set = PETSC_TRUE; 1655 B->structurally_symmetric_set = PETSC_TRUE; 1656 PetscFunctionReturn(0); 1657 } 1658 EXTERN_C_END 1659 1660 #undef __FUNCT__ 1661 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1662 /*@C 1663 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1664 compressed row) format. For good matrix assembly performance the 1665 user should preallocate the matrix storage by setting the parameter nz 1666 (or the array nnz). By setting these parameters accurately, performance 1667 during matrix assembly can be increased by more than a factor of 50. 1668 1669 Collective on Mat 1670 1671 Input Parameters: 1672 + A - the symmetric matrix 1673 . bs - size of block 1674 . nz - number of block nonzeros per block row (same for all rows) 1675 - nnz - array containing the number of block nonzeros in the upper triangular plus 1676 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1677 1678 Options Database Keys: 1679 . -mat_no_unroll - uses code that does not unroll the loops in the 1680 block calculations (much slower) 1681 . -mat_block_size - size of the blocks to use 1682 1683 Level: intermediate 1684 1685 Notes: 1686 Specify the preallocated storage with either nz or nnz (not both). 1687 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1688 allocation. For additional details, see the users manual chapter on 1689 matrices. 1690 1691 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1692 @*/ 1693 PetscErrorCode MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[]) { 1694 PetscErrorCode ierr,(*f)(Mat,int,int,const int[]); 1695 1696 PetscFunctionBegin; 1697 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1698 if (f) { 1699 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 1700 } 1701 PetscFunctionReturn(0); 1702 } 1703 1704 #undef __FUNCT__ 1705 #define __FUNCT__ "MatCreateSeqSBAIJ" 1706 /*@C 1707 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1708 compressed row) format. For good matrix assembly performance the 1709 user should preallocate the matrix storage by setting the parameter nz 1710 (or the array nnz). By setting these parameters accurately, performance 1711 during matrix assembly can be increased by more than a factor of 50. 1712 1713 Collective on MPI_Comm 1714 1715 Input Parameters: 1716 + comm - MPI communicator, set to PETSC_COMM_SELF 1717 . bs - size of block 1718 . m - number of rows, or number of columns 1719 . nz - number of block nonzeros per block row (same for all rows) 1720 - nnz - array containing the number of block nonzeros in the upper triangular plus 1721 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1722 1723 Output Parameter: 1724 . A - the symmetric matrix 1725 1726 Options Database Keys: 1727 . -mat_no_unroll - uses code that does not unroll the loops in the 1728 block calculations (much slower) 1729 . -mat_block_size - size of the blocks to use 1730 1731 Level: intermediate 1732 1733 Notes: 1734 1735 Specify the preallocated storage with either nz or nnz (not both). 1736 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1737 allocation. For additional details, see the users manual chapter on 1738 matrices. 1739 1740 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1741 @*/ 1742 PetscErrorCode MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A) 1743 { 1744 PetscErrorCode ierr; 1745 1746 PetscFunctionBegin; 1747 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1748 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1749 ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1750 PetscFunctionReturn(0); 1751 } 1752 1753 #undef __FUNCT__ 1754 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1755 PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1756 { 1757 Mat C; 1758 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1759 PetscErrorCode ierr; 1760 int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 1761 1762 PetscFunctionBegin; 1763 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1764 1765 *B = 0; 1766 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1767 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 1768 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1769 c = (Mat_SeqSBAIJ*)C->data; 1770 1771 C->preallocated = PETSC_TRUE; 1772 C->factor = A->factor; 1773 c->row = 0; 1774 c->icol = 0; 1775 c->saved_values = 0; 1776 c->keepzeroedrows = a->keepzeroedrows; 1777 C->assembled = PETSC_TRUE; 1778 1779 C->M = A->M; 1780 C->N = A->N; 1781 c->bs = a->bs; 1782 c->bs2 = a->bs2; 1783 c->mbs = a->mbs; 1784 c->nbs = a->nbs; 1785 1786 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1787 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1788 for (i=0; i<mbs; i++) { 1789 c->imax[i] = a->imax[i]; 1790 c->ilen[i] = a->ilen[i]; 1791 } 1792 1793 /* allocate the matrix space */ 1794 c->singlemalloc = PETSC_TRUE; 1795 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1796 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1797 c->j = (int*)(c->a + nz*bs2); 1798 c->i = c->j + nz; 1799 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1800 if (mbs > 0) { 1801 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1802 if (cpvalues == MAT_COPY_VALUES) { 1803 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1804 } else { 1805 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1806 } 1807 } 1808 1809 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1810 c->sorted = a->sorted; 1811 c->roworiented = a->roworiented; 1812 c->nonew = a->nonew; 1813 1814 if (a->diag) { 1815 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1816 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1817 for (i=0; i<mbs; i++) { 1818 c->diag[i] = a->diag[i]; 1819 } 1820 } else c->diag = 0; 1821 c->nz = a->nz; 1822 c->maxnz = a->maxnz; 1823 c->solve_work = 0; 1824 c->mult_work = 0; 1825 *B = C; 1826 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1827 PetscFunctionReturn(0); 1828 } 1829 1830 #undef __FUNCT__ 1831 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1832 PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer,const MatType type,Mat *A) 1833 { 1834 Mat_SeqSBAIJ *a; 1835 Mat B; 1836 PetscErrorCode ierr; 1837 int i,nz,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1838 int *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 1839 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1840 int *masked,nmask,tmp,bs2,ishift; 1841 PetscScalar *aa; 1842 MPI_Comm comm = ((PetscObject)viewer)->comm; 1843 1844 PetscFunctionBegin; 1845 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1846 bs2 = bs*bs; 1847 1848 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1849 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1850 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1851 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1852 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1853 M = header[1]; N = header[2]; nz = header[3]; 1854 1855 if (header[3] < 0) { 1856 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1857 } 1858 1859 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1860 1861 /* 1862 This code adds extra rows to make sure the number of rows is 1863 divisible by the blocksize 1864 */ 1865 mbs = M/bs; 1866 extra_rows = bs - M + bs*(mbs); 1867 if (extra_rows == bs) extra_rows = 0; 1868 else mbs++; 1869 if (extra_rows) { 1870 PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n"); 1871 } 1872 1873 /* read in row lengths */ 1874 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1875 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1876 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1877 1878 /* read in column indices */ 1879 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1880 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1881 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1882 1883 /* loop over row lengths determining block row lengths */ 1884 ierr = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr); 1885 ierr = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1886 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1887 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1888 masked = mask + mbs; 1889 rowcount = 0; nzcount = 0; 1890 for (i=0; i<mbs; i++) { 1891 nmask = 0; 1892 for (j=0; j<bs; j++) { 1893 kmax = rowlengths[rowcount]; 1894 for (k=0; k<kmax; k++) { 1895 tmp = jj[nzcount++]/bs; /* block col. index */ 1896 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1897 } 1898 rowcount++; 1899 } 1900 s_browlengths[i] += nmask; 1901 1902 /* zero out the mask elements we set */ 1903 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1904 } 1905 1906 /* create our matrix */ 1907 ierr = MatCreate(comm,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows,&B);CHKERRQ(ierr); 1908 ierr = MatSetType(B,type);CHKERRQ(ierr); 1909 ierr = MatSeqSBAIJSetPreallocation(B,bs,0,s_browlengths);CHKERRQ(ierr); 1910 a = (Mat_SeqSBAIJ*)B->data; 1911 1912 /* set matrix "i" values */ 1913 a->i[0] = 0; 1914 for (i=1; i<= mbs; i++) { 1915 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1916 a->ilen[i-1] = s_browlengths[i-1]; 1917 } 1918 a->nz = a->i[mbs]; 1919 1920 /* read in nonzero values */ 1921 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1922 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1923 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1924 1925 /* set "a" and "j" values into matrix */ 1926 nzcount = 0; jcount = 0; 1927 for (i=0; i<mbs; i++) { 1928 nzcountb = nzcount; 1929 nmask = 0; 1930 for (j=0; j<bs; j++) { 1931 kmax = rowlengths[i*bs+j]; 1932 for (k=0; k<kmax; k++) { 1933 tmp = jj[nzcount++]/bs; /* block col. index */ 1934 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1935 } 1936 } 1937 /* sort the masked values */ 1938 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1939 1940 /* set "j" values into matrix */ 1941 maskcount = 1; 1942 for (j=0; j<nmask; j++) { 1943 a->j[jcount++] = masked[j]; 1944 mask[masked[j]] = maskcount++; 1945 } 1946 1947 /* set "a" values into matrix */ 1948 ishift = bs2*a->i[i]; 1949 for (j=0; j<bs; j++) { 1950 kmax = rowlengths[i*bs+j]; 1951 for (k=0; k<kmax; k++) { 1952 tmp = jj[nzcountb]/bs ; /* block col. index */ 1953 if (tmp >= i){ 1954 block = mask[tmp] - 1; 1955 point = jj[nzcountb] - bs*tmp; 1956 idx = ishift + bs2*block + j + bs*point; 1957 a->a[idx] = aa[nzcountb]; 1958 } 1959 nzcountb++; 1960 } 1961 } 1962 /* zero out the mask elements we set */ 1963 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1964 } 1965 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1966 1967 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1968 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1969 ierr = PetscFree(aa);CHKERRQ(ierr); 1970 ierr = PetscFree(jj);CHKERRQ(ierr); 1971 ierr = PetscFree(mask);CHKERRQ(ierr); 1972 1973 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1974 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1975 ierr = MatView_Private(B);CHKERRQ(ierr); 1976 *A = B; 1977 PetscFunctionReturn(0); 1978 } 1979 1980 #undef __FUNCT__ 1981 #define __FUNCT__ "MatRelax_SeqSBAIJ" 1982 PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1983 { 1984 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1985 MatScalar *aa=a->a,*v,*v1; 1986 PetscScalar *x,*b,*t,sum,d; 1987 PetscErrorCode ierr; 1988 int m=a->mbs,bs=a->bs,*ai=a->i,*aj=a->j; 1989 int nz,nz1,*vj,*vj1,i; 1990 1991 PetscFunctionBegin; 1992 its = its*lits; 1993 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1994 1995 if (bs > 1) 1996 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 1997 1998 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1999 if (xx != bb) { 2000 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2001 } else { 2002 b = x; 2003 } 2004 2005 ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr); 2006 2007 if (flag & SOR_ZERO_INITIAL_GUESS) { 2008 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2009 for (i=0; i<m; i++) 2010 t[i] = b[i]; 2011 2012 for (i=0; i<m; i++){ 2013 d = *(aa + ai[i]); /* diag[i] */ 2014 v = aa + ai[i] + 1; 2015 vj = aj + ai[i] + 1; 2016 nz = ai[i+1] - ai[i] - 1; 2017 x[i] = omega*t[i]/d; 2018 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 2019 PetscLogFlops(2*nz-1); 2020 } 2021 } 2022 2023 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2024 for (i=0; i<m; i++) 2025 t[i] = b[i]; 2026 2027 for (i=0; i<m-1; i++){ /* update rhs */ 2028 v = aa + ai[i] + 1; 2029 vj = aj + ai[i] + 1; 2030 nz = ai[i+1] - ai[i] - 1; 2031 while (nz--) t[*vj++] -= x[i]*(*v++); 2032 PetscLogFlops(2*nz-1); 2033 } 2034 for (i=m-1; i>=0; i--){ 2035 d = *(aa + ai[i]); 2036 v = aa + ai[i] + 1; 2037 vj = aj + ai[i] + 1; 2038 nz = ai[i+1] - ai[i] - 1; 2039 sum = t[i]; 2040 while (nz--) sum -= x[*vj++]*(*v++); 2041 PetscLogFlops(2*nz-1); 2042 x[i] = (1-omega)*x[i] + omega*sum/d; 2043 } 2044 } 2045 its--; 2046 } 2047 2048 while (its--) { 2049 /* 2050 forward sweep: 2051 for i=0,...,m-1: 2052 sum[i] = (b[i] - U(i,:)x )/d[i]; 2053 x[i] = (1-omega)x[i] + omega*sum[i]; 2054 b = b - x[i]*U^T(i,:); 2055 2056 */ 2057 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2058 for (i=0; i<m; i++) 2059 t[i] = b[i]; 2060 2061 for (i=0; i<m; i++){ 2062 d = *(aa + ai[i]); /* diag[i] */ 2063 v = aa + ai[i] + 1; v1=v; 2064 vj = aj + ai[i] + 1; vj1=vj; 2065 nz = ai[i+1] - ai[i] - 1; nz1=nz; 2066 sum = t[i]; 2067 while (nz1--) sum -= (*v1++)*x[*vj1++]; 2068 x[i] = (1-omega)*x[i] + omega*sum/d; 2069 while (nz--) t[*vj++] -= x[i]*(*v++); 2070 PetscLogFlops(4*nz-2); 2071 } 2072 } 2073 2074 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2075 /* 2076 backward sweep: 2077 b = b - x[i]*U^T(i,:), i=0,...,n-2 2078 for i=m-1,...,0: 2079 sum[i] = (b[i] - U(i,:)x )/d[i]; 2080 x[i] = (1-omega)x[i] + omega*sum[i]; 2081 */ 2082 for (i=0; i<m; i++) 2083 t[i] = b[i]; 2084 2085 for (i=0; i<m-1; i++){ /* update rhs */ 2086 v = aa + ai[i] + 1; 2087 vj = aj + ai[i] + 1; 2088 nz = ai[i+1] - ai[i] - 1; 2089 while (nz--) t[*vj++] -= x[i]*(*v++); 2090 PetscLogFlops(2*nz-1); 2091 } 2092 for (i=m-1; i>=0; i--){ 2093 d = *(aa + ai[i]); 2094 v = aa + ai[i] + 1; 2095 vj = aj + ai[i] + 1; 2096 nz = ai[i+1] - ai[i] - 1; 2097 sum = t[i]; 2098 while (nz--) sum -= x[*vj++]*(*v++); 2099 PetscLogFlops(2*nz-1); 2100 x[i] = (1-omega)*x[i] + omega*sum/d; 2101 } 2102 } 2103 } 2104 2105 ierr = PetscFree(t);CHKERRQ(ierr); 2106 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2107 if (bb != xx) { 2108 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2109 } 2110 2111 PetscFunctionReturn(0); 2112 } 2113 2114 2115 2116 2117 2118 2119