1 2 /* 3 Defines the basic matrix operations for the SBAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 7 #include "src/inline/spops.h" 8 #include "src/mat/impls/sbaij/seq/sbaij.h" 9 10 #define CHUNKSIZE 10 11 12 /* 13 Checks for missing diagonals 14 */ 15 #undef __FUNCT__ 16 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 17 PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A) 18 { 19 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 20 PetscErrorCode ierr; 21 int *diag,*jj = a->j,i; 22 23 PetscFunctionBegin; 24 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 25 diag = a->diag; 26 for (i=0; i<a->mbs; i++) { 27 if (jj[diag[i]] != i) SETERRQ1(1,"Matrix is missing diagonal number %d",i); 28 } 29 PetscFunctionReturn(0); 30 } 31 32 #undef __FUNCT__ 33 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 34 PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A) 35 { 36 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 37 PetscErrorCode ierr; 38 int i,mbs = a->mbs; 39 40 PetscFunctionBegin; 41 if (a->diag) PetscFunctionReturn(0); 42 43 ierr = PetscMalloc((mbs+1)*sizeof(int),&a->diag);CHKERRQ(ierr); 44 PetscLogObjectMemory(A,(mbs+1)*sizeof(int)); 45 for (i=0; i<mbs; i++) a->diag[i] = a->i[i]; 46 PetscFunctionReturn(0); 47 } 48 49 #undef __FUNCT__ 50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 51 static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done) 52 { 53 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 54 int n = a->mbs,i; 55 56 PetscFunctionBegin; 57 *nn = n; 58 if (!ia) PetscFunctionReturn(0); 59 60 if (oshift == 1) { 61 /* temporarily add 1 to i and j indices */ 62 int nz = a->i[n]; 63 for (i=0; i<nz; i++) a->j[i]++; 64 for (i=0; i<n+1; i++) a->i[i]++; 65 *ia = a->i; *ja = a->j; 66 } else { 67 *ia = a->i; *ja = a->j; 68 } 69 PetscFunctionReturn(0); 70 } 71 72 #undef __FUNCT__ 73 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 74 static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done) 75 { 76 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 77 int i,n = a->mbs; 78 79 PetscFunctionBegin; 80 if (!ia) PetscFunctionReturn(0); 81 82 if (oshift == 1) { 83 int nz = a->i[n]-1; 84 for (i=0; i<nz; i++) a->j[i]--; 85 for (i=0; i<n+1; i++) a->i[i]--; 86 } 87 PetscFunctionReturn(0); 88 } 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatGetBlockSize_SeqSBAIJ" 92 PetscErrorCode MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs) 93 { 94 Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data; 95 96 PetscFunctionBegin; 97 *bs = sbaij->bs; 98 PetscFunctionReturn(0); 99 } 100 101 #undef __FUNCT__ 102 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 103 PetscErrorCode MatDestroy_SeqSBAIJ(Mat A) 104 { 105 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 106 PetscErrorCode ierr; 107 108 PetscFunctionBegin; 109 #if defined(PETSC_USE_LOG) 110 PetscLogObjectState((PetscObject)A,"Rows=%d, NZ=%d",A->m,a->nz); 111 #endif 112 ierr = PetscFree(a->a);CHKERRQ(ierr); 113 if (!a->singlemalloc) { 114 ierr = PetscFree(a->i);CHKERRQ(ierr); 115 ierr = PetscFree(a->j);CHKERRQ(ierr); 116 } 117 if (a->row) { 118 ierr = ISDestroy(a->row);CHKERRQ(ierr); 119 } 120 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 121 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 122 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 123 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 124 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 125 if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);} 126 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 127 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 128 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 129 130 if (a->inew){ 131 ierr = PetscFree(a->inew);CHKERRQ(ierr); 132 a->inew = 0; 133 } 134 ierr = PetscFree(a);CHKERRQ(ierr); 135 PetscFunctionReturn(0); 136 } 137 138 #undef __FUNCT__ 139 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 140 PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op) 141 { 142 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 143 144 PetscFunctionBegin; 145 switch (op) { 146 case MAT_ROW_ORIENTED: 147 a->roworiented = PETSC_TRUE; 148 break; 149 case MAT_COLUMN_ORIENTED: 150 a->roworiented = PETSC_FALSE; 151 break; 152 case MAT_COLUMNS_SORTED: 153 a->sorted = PETSC_TRUE; 154 break; 155 case MAT_COLUMNS_UNSORTED: 156 a->sorted = PETSC_FALSE; 157 break; 158 case MAT_KEEP_ZEROED_ROWS: 159 a->keepzeroedrows = PETSC_TRUE; 160 break; 161 case MAT_NO_NEW_NONZERO_LOCATIONS: 162 a->nonew = 1; 163 break; 164 case MAT_NEW_NONZERO_LOCATION_ERR: 165 a->nonew = -1; 166 break; 167 case MAT_NEW_NONZERO_ALLOCATION_ERR: 168 a->nonew = -2; 169 break; 170 case MAT_YES_NEW_NONZERO_LOCATIONS: 171 a->nonew = 0; 172 break; 173 case MAT_ROWS_SORTED: 174 case MAT_ROWS_UNSORTED: 175 case MAT_YES_NEW_DIAGONALS: 176 case MAT_IGNORE_OFF_PROC_ENTRIES: 177 case MAT_USE_HASH_TABLE: 178 PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n"); 179 break; 180 case MAT_NO_NEW_DIAGONALS: 181 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 182 case MAT_NOT_SYMMETRIC: 183 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 184 case MAT_HERMITIAN: 185 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 186 case MAT_SYMMETRIC: 187 case MAT_STRUCTURALLY_SYMMETRIC: 188 case MAT_NOT_HERMITIAN: 189 case MAT_SYMMETRY_ETERNAL: 190 case MAT_NOT_SYMMETRY_ETERNAL: 191 break; 192 default: 193 SETERRQ(PETSC_ERR_SUP,"unknown option"); 194 } 195 PetscFunctionReturn(0); 196 } 197 198 #undef __FUNCT__ 199 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 200 PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,PetscScalar **v) 201 { 202 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 203 PetscErrorCode ierr; 204 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2; 205 MatScalar *aa,*aa_i; 206 PetscScalar *v_i; 207 208 PetscFunctionBegin; 209 bs = a->bs; 210 ai = a->i; 211 aj = a->j; 212 aa = a->a; 213 bs2 = a->bs2; 214 215 if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %d out of range", row); 216 217 bn = row/bs; /* Block number */ 218 bp = row % bs; /* Block position */ 219 M = ai[bn+1] - ai[bn]; 220 *ncols = bs*M; 221 222 if (v) { 223 *v = 0; 224 if (*ncols) { 225 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 226 for (i=0; i<M; i++) { /* for each block in the block row */ 227 v_i = *v + i*bs; 228 aa_i = aa + bs2*(ai[bn] + i); 229 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 230 } 231 } 232 } 233 234 if (cols) { 235 *cols = 0; 236 if (*ncols) { 237 ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr); 238 for (i=0; i<M; i++) { /* for each block in the block row */ 239 cols_i = *cols + i*bs; 240 itmp = bs*aj[ai[bn] + i]; 241 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 242 } 243 } 244 } 245 246 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 247 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 248 #ifdef column_search 249 v_i = *v + M*bs; 250 cols_i = *cols + M*bs; 251 for (i=0; i<bn; i++){ /* for each block row */ 252 M = ai[i+1] - ai[i]; 253 for (j=0; j<M; j++){ 254 itmp = aj[ai[i] + j]; /* block column value */ 255 if (itmp == bn){ 256 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 257 for (k=0; k<bs; k++) { 258 *cols_i++ = i*bs+k; 259 *v_i++ = aa_i[k]; 260 } 261 *ncols += bs; 262 break; 263 } 264 } 265 } 266 #endif 267 268 PetscFunctionReturn(0); 269 } 270 271 #undef __FUNCT__ 272 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 273 PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 274 { 275 PetscErrorCode ierr; 276 277 PetscFunctionBegin; 278 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 279 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 280 PetscFunctionReturn(0); 281 } 282 283 #undef __FUNCT__ 284 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 285 PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,Mat *B) 286 { 287 PetscErrorCode ierr; 288 PetscFunctionBegin; 289 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 290 PetscFunctionReturn(0); 291 } 292 293 #undef __FUNCT__ 294 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 295 static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 296 { 297 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 298 PetscErrorCode ierr; 299 int i,j,bs = a->bs,k,l,bs2=a->bs2; 300 char *name; 301 PetscViewerFormat format; 302 303 PetscFunctionBegin; 304 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 305 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 306 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 307 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 308 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 309 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 310 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 311 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 312 for (i=0; i<a->mbs; i++) { 313 for (j=0; j<bs; j++) { 314 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 315 for (k=a->i[i]; k<a->i[i+1]; k++) { 316 for (l=0; l<bs; l++) { 317 #if defined(PETSC_USE_COMPLEX) 318 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 319 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 320 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 321 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 322 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 323 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 324 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 325 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 326 } 327 #else 328 if (a->a[bs2*k + l*bs + j] != 0.0) { 329 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 330 } 331 #endif 332 } 333 } 334 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 335 } 336 } 337 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 338 } else { 339 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 340 for (i=0; i<a->mbs; i++) { 341 for (j=0; j<bs; j++) { 342 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 343 for (k=a->i[i]; k<a->i[i+1]; k++) { 344 for (l=0; l<bs; l++) { 345 #if defined(PETSC_USE_COMPLEX) 346 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 347 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 348 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 349 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 350 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 351 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 352 } else { 353 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 354 } 355 #else 356 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 357 #endif 358 } 359 } 360 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 361 } 362 } 363 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 364 } 365 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 366 PetscFunctionReturn(0); 367 } 368 369 #undef __FUNCT__ 370 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 371 static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 372 { 373 Mat A = (Mat) Aa; 374 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 375 PetscErrorCode ierr; 376 int row,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank; 377 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 378 MatScalar *aa; 379 MPI_Comm comm; 380 PetscViewer viewer; 381 382 PetscFunctionBegin; 383 /* 384 This is nasty. If this is called from an originally parallel matrix 385 then all processes call this,but only the first has the matrix so the 386 rest should return immediately. 387 */ 388 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 389 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 390 if (rank) PetscFunctionReturn(0); 391 392 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 393 394 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 395 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 396 397 /* loop over matrix elements drawing boxes */ 398 color = PETSC_DRAW_BLUE; 399 for (i=0,row=0; i<mbs; i++,row+=bs) { 400 for (j=a->i[i]; j<a->i[i+1]; j++) { 401 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 402 x_l = a->j[j]*bs; x_r = x_l + 1.0; 403 aa = a->a + j*bs2; 404 for (k=0; k<bs; k++) { 405 for (l=0; l<bs; l++) { 406 if (PetscRealPart(*aa++) >= 0.) continue; 407 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 408 } 409 } 410 } 411 } 412 color = PETSC_DRAW_CYAN; 413 for (i=0,row=0; i<mbs; i++,row+=bs) { 414 for (j=a->i[i]; j<a->i[i+1]; j++) { 415 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 416 x_l = a->j[j]*bs; x_r = x_l + 1.0; 417 aa = a->a + j*bs2; 418 for (k=0; k<bs; k++) { 419 for (l=0; l<bs; l++) { 420 if (PetscRealPart(*aa++) != 0.) continue; 421 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 422 } 423 } 424 } 425 } 426 427 color = PETSC_DRAW_RED; 428 for (i=0,row=0; i<mbs; i++,row+=bs) { 429 for (j=a->i[i]; j<a->i[i+1]; j++) { 430 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 431 x_l = a->j[j]*bs; x_r = x_l + 1.0; 432 aa = a->a + j*bs2; 433 for (k=0; k<bs; k++) { 434 for (l=0; l<bs; l++) { 435 if (PetscRealPart(*aa++) <= 0.) continue; 436 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 437 } 438 } 439 } 440 } 441 PetscFunctionReturn(0); 442 } 443 444 #undef __FUNCT__ 445 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 446 static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 447 { 448 PetscErrorCode ierr; 449 PetscReal xl,yl,xr,yr,w,h; 450 PetscDraw draw; 451 PetscTruth isnull; 452 453 PetscFunctionBegin; 454 455 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 456 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 457 458 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 459 xr = A->m; yr = A->m; h = yr/10.0; w = xr/10.0; 460 xr += w; yr += h; xl = -w; yl = -h; 461 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 462 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 463 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 464 PetscFunctionReturn(0); 465 } 466 467 #undef __FUNCT__ 468 #define __FUNCT__ "MatView_SeqSBAIJ" 469 PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 470 { 471 PetscErrorCode ierr; 472 PetscTruth iascii,isdraw; 473 474 PetscFunctionBegin; 475 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 476 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 477 if (iascii){ 478 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 479 } else if (isdraw) { 480 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 481 } else { 482 Mat B; 483 ierr = MatConvert(A,MATSEQAIJ,&B);CHKERRQ(ierr); 484 ierr = MatView(B,viewer);CHKERRQ(ierr); 485 ierr = MatDestroy(B);CHKERRQ(ierr); 486 } 487 PetscFunctionReturn(0); 488 } 489 490 491 #undef __FUNCT__ 492 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 493 PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[]) 494 { 495 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 496 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 497 int *ai = a->i,*ailen = a->ilen; 498 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 499 MatScalar *ap,*aa = a->a,zero = 0.0; 500 501 PetscFunctionBegin; 502 for (k=0; k<m; k++) { /* loop over rows */ 503 row = im[k]; brow = row/bs; 504 if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",row); 505 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1); 506 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 507 nrow = ailen[brow]; 508 for (l=0; l<n; l++) { /* loop over columns */ 509 if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",in[l]); 510 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1); 511 col = in[l] ; 512 bcol = col/bs; 513 cidx = col%bs; 514 ridx = row%bs; 515 high = nrow; 516 low = 0; /* assume unsorted */ 517 while (high-low > 5) { 518 t = (low+high)/2; 519 if (rp[t] > bcol) high = t; 520 else low = t; 521 } 522 for (i=low; i<high; i++) { 523 if (rp[i] > bcol) break; 524 if (rp[i] == bcol) { 525 *v++ = ap[bs2*i+bs*cidx+ridx]; 526 goto finished; 527 } 528 } 529 *v++ = zero; 530 finished:; 531 } 532 } 533 PetscFunctionReturn(0); 534 } 535 536 537 #undef __FUNCT__ 538 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 539 PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is) 540 { 541 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 542 PetscErrorCode ierr; 543 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 544 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 545 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval; 546 PetscTruth roworiented=a->roworiented; 547 const MatScalar *value = v; 548 MatScalar *ap,*aa = a->a,*bap; 549 550 PetscFunctionBegin; 551 if (roworiented) { 552 stepval = (n-1)*bs; 553 } else { 554 stepval = (m-1)*bs; 555 } 556 for (k=0; k<m; k++) { /* loop over added rows */ 557 row = im[k]; 558 if (row < 0) continue; 559 #if defined(PETSC_USE_BOPT_g) 560 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1); 561 #endif 562 rp = aj + ai[row]; 563 ap = aa + bs2*ai[row]; 564 rmax = imax[row]; 565 nrow = ailen[row]; 566 low = 0; 567 for (l=0; l<n; l++) { /* loop over added columns */ 568 if (in[l] < 0) continue; 569 col = in[l]; 570 #if defined(PETSC_USE_BOPT_g) 571 if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",col,a->nbs-1); 572 #endif 573 if (col < row) continue; /* ignore lower triangular block */ 574 if (roworiented) { 575 value = v + k*(stepval+bs)*bs + l*bs; 576 } else { 577 value = v + l*(stepval+bs)*bs + k*bs; 578 } 579 if (!sorted) low = 0; high = nrow; 580 while (high-low > 7) { 581 t = (low+high)/2; 582 if (rp[t] > col) high = t; 583 else low = t; 584 } 585 for (i=low; i<high; i++) { 586 if (rp[i] > col) break; 587 if (rp[i] == col) { 588 bap = ap + bs2*i; 589 if (roworiented) { 590 if (is == ADD_VALUES) { 591 for (ii=0; ii<bs; ii++,value+=stepval) { 592 for (jj=ii; jj<bs2; jj+=bs) { 593 bap[jj] += *value++; 594 } 595 } 596 } else { 597 for (ii=0; ii<bs; ii++,value+=stepval) { 598 for (jj=ii; jj<bs2; jj+=bs) { 599 bap[jj] = *value++; 600 } 601 } 602 } 603 } else { 604 if (is == ADD_VALUES) { 605 for (ii=0; ii<bs; ii++,value+=stepval) { 606 for (jj=0; jj<bs; jj++) { 607 *bap++ += *value++; 608 } 609 } 610 } else { 611 for (ii=0; ii<bs; ii++,value+=stepval) { 612 for (jj=0; jj<bs; jj++) { 613 *bap++ = *value++; 614 } 615 } 616 } 617 } 618 goto noinsert2; 619 } 620 } 621 if (nonew == 1) goto noinsert2; 622 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 623 if (nrow >= rmax) { 624 /* there is no extra room in row, therefore enlarge */ 625 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 626 MatScalar *new_a; 627 628 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 629 630 /* malloc new storage space */ 631 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 632 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 633 new_j = (int*)(new_a + bs2*new_nz); 634 new_i = new_j + new_nz; 635 636 /* copy over old data into new slots */ 637 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 638 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 639 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 640 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 641 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 642 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 643 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 644 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 645 /* free up old matrix storage */ 646 ierr = PetscFree(a->a);CHKERRQ(ierr); 647 if (!a->singlemalloc) { 648 ierr = PetscFree(a->i);CHKERRQ(ierr); 649 ierr = PetscFree(a->j);CHKERRQ(ierr); 650 } 651 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 652 a->singlemalloc = PETSC_TRUE; 653 654 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 655 rmax = imax[row] = imax[row] + CHUNKSIZE; 656 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 657 a->maxnz += bs2*CHUNKSIZE; 658 a->reallocs++; 659 a->nz++; 660 } 661 N = nrow++ - 1; 662 /* shift up all the later entries in this row */ 663 for (ii=N; ii>=i; ii--) { 664 rp[ii+1] = rp[ii]; 665 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 666 } 667 if (N >= i) { 668 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 669 } 670 rp[i] = col; 671 bap = ap + bs2*i; 672 if (roworiented) { 673 for (ii=0; ii<bs; ii++,value+=stepval) { 674 for (jj=ii; jj<bs2; jj+=bs) { 675 bap[jj] = *value++; 676 } 677 } 678 } else { 679 for (ii=0; ii<bs; ii++,value+=stepval) { 680 for (jj=0; jj<bs; jj++) { 681 *bap++ = *value++; 682 } 683 } 684 } 685 noinsert2:; 686 low = i; 687 } 688 ailen[row] = nrow; 689 } 690 PetscFunctionReturn(0); 691 } 692 693 #undef __FUNCT__ 694 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 695 PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 696 { 697 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 698 PetscErrorCode ierr; 699 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 700 int m = A->m,*ip,N,*ailen = a->ilen; 701 int mbs = a->mbs,bs2 = a->bs2,rmax = 0; 702 MatScalar *aa = a->a,*ap; 703 704 PetscFunctionBegin; 705 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 706 707 if (m) rmax = ailen[0]; 708 for (i=1; i<mbs; i++) { 709 /* move each row back by the amount of empty slots (fshift) before it*/ 710 fshift += imax[i-1] - ailen[i-1]; 711 rmax = PetscMax(rmax,ailen[i]); 712 if (fshift) { 713 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 714 N = ailen[i]; 715 for (j=0; j<N; j++) { 716 ip[j-fshift] = ip[j]; 717 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 718 } 719 } 720 ai[i] = ai[i-1] + ailen[i-1]; 721 } 722 if (mbs) { 723 fshift += imax[mbs-1] - ailen[mbs-1]; 724 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 725 } 726 /* reset ilen and imax for each row */ 727 for (i=0; i<mbs; i++) { 728 ailen[i] = imax[i] = ai[i+1] - ai[i]; 729 } 730 a->nz = ai[mbs]; 731 732 /* diagonals may have moved, reset it */ 733 if (a->diag) { 734 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(int));CHKERRQ(ierr); 735 } 736 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 737 m,A->m,a->bs,fshift*bs2,a->nz*bs2); 738 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n", 739 a->reallocs); 740 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 741 a->reallocs = 0; 742 A->info.nz_unneeded = (PetscReal)fshift*bs2; 743 744 PetscFunctionReturn(0); 745 } 746 747 /* 748 This function returns an array of flags which indicate the locations of contiguous 749 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 750 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 751 Assume: sizes should be long enough to hold all the values. 752 */ 753 #undef __FUNCT__ 754 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 755 PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 756 { 757 int i,j,k,row; 758 PetscTruth flg; 759 760 PetscFunctionBegin; 761 for (i=0,j=0; i<n; j++) { 762 row = idx[i]; 763 if (row%bs!=0) { /* Not the begining of a block */ 764 sizes[j] = 1; 765 i++; 766 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 767 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 768 i++; 769 } else { /* Begining of the block, so check if the complete block exists */ 770 flg = PETSC_TRUE; 771 for (k=1; k<bs; k++) { 772 if (row+k != idx[i+k]) { /* break in the block */ 773 flg = PETSC_FALSE; 774 break; 775 } 776 } 777 if (flg == PETSC_TRUE) { /* No break in the bs */ 778 sizes[j] = bs; 779 i+= bs; 780 } else { 781 sizes[j] = 1; 782 i++; 783 } 784 } 785 } 786 *bs_max = j; 787 PetscFunctionReturn(0); 788 } 789 790 #undef __FUNCT__ 791 #define __FUNCT__ "MatZeroRows_SeqSBAIJ" 792 PetscErrorCode MatZeroRows_SeqSBAIJ(Mat A,IS is,const PetscScalar *diag) 793 { 794 PetscFunctionBegin; 795 SETERRQ(PETSC_ERR_SUP,"No support for this function yet"); 796 } 797 798 /* Only add/insert a(i,j) with i<=j (blocks). 799 Any a(i,j) with i>j input by user is ingored. 800 */ 801 802 #undef __FUNCT__ 803 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 804 PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is) 805 { 806 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 807 PetscErrorCode ierr; 808 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 809 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 810 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 811 int ridx,cidx,bs2=a->bs2; 812 MatScalar *ap,value,*aa=a->a,*bap; 813 814 PetscFunctionBegin; 815 816 for (k=0; k<m; k++) { /* loop over added rows */ 817 row = im[k]; /* row number */ 818 brow = row/bs; /* block row number */ 819 if (row < 0) continue; 820 #if defined(PETSC_USE_BOPT_g) 821 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1); 822 #endif 823 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 824 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 825 rmax = imax[brow]; /* maximum space allocated for this row */ 826 nrow = ailen[brow]; /* actual length of this row */ 827 low = 0; 828 829 for (l=0; l<n; l++) { /* loop over added columns */ 830 if (in[l] < 0) continue; 831 #if defined(PETSC_USE_BOPT_g) 832 if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m-1); 833 #endif 834 col = in[l]; 835 bcol = col/bs; /* block col number */ 836 837 if (brow <= bcol){ 838 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 839 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 840 /* element value a(k,l) */ 841 if (roworiented) { 842 value = v[l + k*n]; 843 } else { 844 value = v[k + l*m]; 845 } 846 847 /* move pointer bap to a(k,l) quickly and add/insert value */ 848 if (!sorted) low = 0; high = nrow; 849 while (high-low > 7) { 850 t = (low+high)/2; 851 if (rp[t] > bcol) high = t; 852 else low = t; 853 } 854 for (i=low; i<high; i++) { 855 /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */ 856 if (rp[i] > bcol) break; 857 if (rp[i] == bcol) { 858 bap = ap + bs2*i + bs*cidx + ridx; 859 if (is == ADD_VALUES) *bap += value; 860 else *bap = value; 861 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 862 if (brow == bcol && ridx < cidx){ 863 bap = ap + bs2*i + bs*ridx + cidx; 864 if (is == ADD_VALUES) *bap += value; 865 else *bap = value; 866 } 867 goto noinsert1; 868 } 869 } 870 871 if (nonew == 1) goto noinsert1; 872 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 873 if (nrow >= rmax) { 874 /* there is no extra room in row, therefore enlarge */ 875 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 876 MatScalar *new_a; 877 878 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 879 880 /* Malloc new storage space */ 881 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 882 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 883 new_j = (int*)(new_a + bs2*new_nz); 884 new_i = new_j + new_nz; 885 886 /* copy over old data into new slots */ 887 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 888 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 889 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 890 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 891 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 892 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 893 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 894 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 895 /* free up old matrix storage */ 896 ierr = PetscFree(a->a);CHKERRQ(ierr); 897 if (!a->singlemalloc) { 898 ierr = PetscFree(a->i);CHKERRQ(ierr); 899 ierr = PetscFree(a->j);CHKERRQ(ierr); 900 } 901 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 902 a->singlemalloc = PETSC_TRUE; 903 904 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 905 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 906 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 907 a->maxnz += bs2*CHUNKSIZE; 908 a->reallocs++; 909 a->nz++; 910 } 911 912 N = nrow++ - 1; 913 /* shift up all the later entries in this row */ 914 for (ii=N; ii>=i; ii--) { 915 rp[ii+1] = rp[ii]; 916 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 917 } 918 if (N>=i) { 919 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 920 } 921 rp[i] = bcol; 922 ap[bs2*i + bs*cidx + ridx] = value; 923 noinsert1:; 924 low = i; 925 /* } */ 926 } 927 } /* end of if .. if.. */ 928 } /* end of loop over added columns */ 929 ailen[brow] = nrow; 930 } /* end of loop over added rows */ 931 932 PetscFunctionReturn(0); 933 } 934 935 EXTERN PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*); 936 EXTERN PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*); 937 EXTERN PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS[],int); 938 EXTERN PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*); 939 EXTERN PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat,int,const IS[],const IS[],MatReuse,Mat*[]); 940 EXTERN PetscErrorCode MatMultTranspose_SeqSBAIJ(Mat,Vec,Vec); 941 EXTERN PetscErrorCode MatMultTransposeAdd_SeqSBAIJ(Mat,Vec,Vec,Vec); 942 EXTERN PetscErrorCode MatScale_SeqSBAIJ(const PetscScalar*,Mat); 943 EXTERN PetscErrorCode MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *); 944 EXTERN PetscErrorCode MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*); 945 EXTERN PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat,Vec); 946 EXTERN PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec); 947 EXTERN PetscErrorCode MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *); 948 EXTERN PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat); 949 EXTERN PetscErrorCode MatGetRowMax_SeqSBAIJ(Mat,Vec); 950 951 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N(Mat,Vec,Vec); 952 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1(Mat,Vec,Vec); 953 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2(Mat,Vec,Vec); 954 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3(Mat,Vec,Vec); 955 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4(Mat,Vec,Vec); 956 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5(Mat,Vec,Vec); 957 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6(Mat,Vec,Vec); 958 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7(Mat,Vec,Vec); 959 EXTERN PetscErrorCode MatSolveTranspose_SeqSBAIJ_7(Mat,Vec,Vec); 960 EXTERN PetscErrorCode MatSolveTranspose_SeqSBAIJ_6(Mat,Vec,Vec); 961 EXTERN PetscErrorCode MatSolveTranspose_SeqSBAIJ_5(Mat,Vec,Vec); 962 EXTERN PetscErrorCode MatSolveTranspose_SeqSBAIJ_4(Mat,Vec,Vec); 963 EXTERN PetscErrorCode MatSolveTranspose_SeqSBAIJ_3(Mat,Vec,Vec); 964 EXTERN PetscErrorCode MatSolveTranspose_SeqSBAIJ_2(Mat,Vec,Vec); 965 EXTERN PetscErrorCode MatSolveTranspose_SeqSBAIJ_1(Mat,Vec,Vec); 966 967 EXTERN PetscErrorCode MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs); 968 969 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec); 970 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec); 971 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec); 972 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 973 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec); 974 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec); 975 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec); 976 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec); 977 978 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*); 979 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*); 980 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*); 981 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*); 982 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*); 983 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*); 984 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*); 985 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*); 986 EXTERN PetscErrorCode MatGetInertia_SeqSBAIJ(Mat,int*,int*,int*); 987 988 EXTERN PetscErrorCode MatMult_SeqSBAIJ_1(Mat,Vec,Vec); 989 EXTERN PetscErrorCode MatMult_SeqSBAIJ_2(Mat,Vec,Vec); 990 EXTERN PetscErrorCode MatMult_SeqSBAIJ_3(Mat,Vec,Vec); 991 EXTERN PetscErrorCode MatMult_SeqSBAIJ_4(Mat,Vec,Vec); 992 EXTERN PetscErrorCode MatMult_SeqSBAIJ_5(Mat,Vec,Vec); 993 EXTERN PetscErrorCode MatMult_SeqSBAIJ_6(Mat,Vec,Vec); 994 EXTERN PetscErrorCode MatMult_SeqSBAIJ_7(Mat,Vec,Vec); 995 EXTERN PetscErrorCode MatMult_SeqSBAIJ_N(Mat,Vec,Vec); 996 997 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec); 998 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec); 999 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec); 1000 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec); 1001 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec); 1002 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec); 1003 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec); 1004 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec); 1005 1006 #ifdef HAVE_MatICCFactor 1007 /* modified from MatILUFactor_SeqSBAIJ, needs further work! */ 1008 #undef __FUNCT__ 1009 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 1010 PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info) 1011 { 1012 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 1013 Mat outA; 1014 PetscErrorCode ierr; 1015 PetscTruth row_identity,col_identity; 1016 1017 PetscFunctionBegin; 1018 /* 1019 if (level != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1020 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1021 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1022 if (!row_identity || !col_identity) { 1023 SETERRQ(1,"Row and column permutations must be identity for in-place ICC"); 1024 } 1025 */ 1026 1027 outA = inA; 1028 inA->factor = FACTOR_CHOLESKY; 1029 1030 if (!a->diag) { 1031 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 1032 } 1033 /* 1034 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1035 for ILU(0) factorization with natural ordering 1036 */ 1037 switch (a->bs) { 1038 case 1: 1039 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1040 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1041 inA->ops->solves = MatSolves_SeqSBAIJ_1; 1042 PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n"); 1043 case 2: 1044 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 1045 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1046 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1047 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1048 break; 1049 case 3: 1050 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 1051 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1052 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1053 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n"); 1054 break; 1055 case 4: 1056 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 1057 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1058 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1059 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1060 break; 1061 case 5: 1062 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 1063 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1064 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1065 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1066 break; 1067 case 6: 1068 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 1069 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1070 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1071 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1072 break; 1073 case 7: 1074 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 1075 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1076 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1077 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1078 break; 1079 default: 1080 a->row = row; 1081 a->icol = col; 1082 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1083 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1084 1085 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1086 ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr); 1087 PetscLogObjectParent(inA,a->icol); 1088 1089 if (!a->solve_work) { 1090 ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1091 PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar)); 1092 } 1093 } 1094 1095 ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr); 1096 1097 PetscFunctionReturn(0); 1098 } 1099 #endif 1100 1101 #undef __FUNCT__ 1102 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ" 1103 PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat A) 1104 { 1105 static PetscTruth called = PETSC_FALSE; 1106 MPI_Comm comm = A->comm; 1107 PetscErrorCode ierr; 1108 1109 PetscFunctionBegin; 1110 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1111 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1112 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1113 PetscFunctionReturn(0); 1114 } 1115 1116 EXTERN_C_BEGIN 1117 #undef __FUNCT__ 1118 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1119 PetscErrorCode MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices) 1120 { 1121 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1122 int i,nz,n; 1123 1124 PetscFunctionBegin; 1125 nz = baij->maxnz; 1126 n = mat->n; 1127 for (i=0; i<nz; i++) { 1128 baij->j[i] = indices[i]; 1129 } 1130 baij->nz = nz; 1131 for (i=0; i<n; i++) { 1132 baij->ilen[i] = baij->imax[i]; 1133 } 1134 1135 PetscFunctionReturn(0); 1136 } 1137 EXTERN_C_END 1138 1139 #undef __FUNCT__ 1140 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1141 /*@ 1142 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1143 in the matrix. 1144 1145 Input Parameters: 1146 + mat - the SeqSBAIJ matrix 1147 - indices - the column indices 1148 1149 Level: advanced 1150 1151 Notes: 1152 This can be called if you have precomputed the nonzero structure of the 1153 matrix and want to provide it to the matrix object to improve the performance 1154 of the MatSetValues() operation. 1155 1156 You MUST have set the correct numbers of nonzeros per row in the call to 1157 MatCreateSeqSBAIJ(). 1158 1159 MUST be called before any calls to MatSetValues() 1160 1161 .seealso: MatCreateSeqSBAIJ 1162 @*/ 1163 PetscErrorCode MatSeqSBAIJSetColumnIndices(Mat mat,int *indices) 1164 { 1165 PetscErrorCode ierr,(*f)(Mat,int *); 1166 1167 PetscFunctionBegin; 1168 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1169 PetscValidPointer(indices,2); 1170 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1171 if (f) { 1172 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1173 } else { 1174 SETERRQ(1,"Wrong type of matrix to set column indices"); 1175 } 1176 PetscFunctionReturn(0); 1177 } 1178 1179 #undef __FUNCT__ 1180 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1181 PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A) 1182 { 1183 PetscErrorCode ierr; 1184 1185 PetscFunctionBegin; 1186 ierr = MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1187 PetscFunctionReturn(0); 1188 } 1189 1190 #undef __FUNCT__ 1191 #define __FUNCT__ "MatGetArray_SeqSBAIJ" 1192 PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1193 { 1194 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1195 PetscFunctionBegin; 1196 *array = a->a; 1197 PetscFunctionReturn(0); 1198 } 1199 1200 #undef __FUNCT__ 1201 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ" 1202 PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1203 { 1204 PetscFunctionBegin; 1205 PetscFunctionReturn(0); 1206 } 1207 1208 #include "petscblaslapack.h" 1209 #undef __FUNCT__ 1210 #define __FUNCT__ "MatAXPY_SeqSBAIJ" 1211 PetscErrorCode MatAXPY_SeqSBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str) 1212 { 1213 Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data; 1214 PetscErrorCode ierr; 1215 int i,bs=y->bs,bs2,j; 1216 PetscBLASInt bnz = (PetscBLASInt)x->nz,one = 1; 1217 1218 PetscFunctionBegin; 1219 if (str == SAME_NONZERO_PATTERN) { 1220 BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1221 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1222 if (y->xtoy && y->XtoY != X) { 1223 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1224 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1225 } 1226 if (!y->xtoy) { /* get xtoy */ 1227 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1228 y->XtoY = X; 1229 } 1230 bs2 = bs*bs; 1231 for (i=0; i<x->nz; i++) { 1232 j = 0; 1233 while (j < bs2){ 1234 y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]); 1235 j++; 1236 } 1237 } 1238 PetscLogInfo(0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %d/%d = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)); 1239 } else { 1240 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1241 } 1242 PetscFunctionReturn(0); 1243 } 1244 1245 #undef __FUNCT__ 1246 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ" 1247 PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg) 1248 { 1249 PetscFunctionBegin; 1250 *flg = PETSC_TRUE; 1251 PetscFunctionReturn(0); 1252 } 1253 1254 #undef __FUNCT__ 1255 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ" 1256 PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg) 1257 { 1258 PetscFunctionBegin; 1259 *flg = PETSC_TRUE; 1260 PetscFunctionReturn(0); 1261 } 1262 1263 #undef __FUNCT__ 1264 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ" 1265 PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscTruth *flg) 1266 { 1267 PetscFunctionBegin; 1268 *flg = PETSC_FALSE; 1269 PetscFunctionReturn(0); 1270 } 1271 1272 /* -------------------------------------------------------------------*/ 1273 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1274 MatGetRow_SeqSBAIJ, 1275 MatRestoreRow_SeqSBAIJ, 1276 MatMult_SeqSBAIJ_N, 1277 /* 4*/ MatMultAdd_SeqSBAIJ_N, 1278 MatMultTranspose_SeqSBAIJ, 1279 MatMultTransposeAdd_SeqSBAIJ, 1280 MatSolve_SeqSBAIJ_N, 1281 0, 1282 0, 1283 /*10*/ 0, 1284 0, 1285 MatCholeskyFactor_SeqSBAIJ, 1286 MatRelax_SeqSBAIJ, 1287 MatTranspose_SeqSBAIJ, 1288 /*15*/ MatGetInfo_SeqSBAIJ, 1289 MatEqual_SeqSBAIJ, 1290 MatGetDiagonal_SeqSBAIJ, 1291 MatDiagonalScale_SeqSBAIJ, 1292 MatNorm_SeqSBAIJ, 1293 /*20*/ 0, 1294 MatAssemblyEnd_SeqSBAIJ, 1295 0, 1296 MatSetOption_SeqSBAIJ, 1297 MatZeroEntries_SeqSBAIJ, 1298 /*25*/ MatZeroRows_SeqSBAIJ, 1299 0, 1300 0, 1301 MatCholeskyFactorSymbolic_SeqSBAIJ, 1302 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1303 /*30*/ MatSetUpPreallocation_SeqSBAIJ, 1304 0, 1305 MatICCFactorSymbolic_SeqSBAIJ, 1306 MatGetArray_SeqSBAIJ, 1307 MatRestoreArray_SeqSBAIJ, 1308 /*35*/ MatDuplicate_SeqSBAIJ, 1309 0, 1310 0, 1311 0, 1312 0, 1313 /*40*/ MatAXPY_SeqSBAIJ, 1314 MatGetSubMatrices_SeqSBAIJ, 1315 MatIncreaseOverlap_SeqSBAIJ, 1316 MatGetValues_SeqSBAIJ, 1317 0, 1318 /*45*/ MatPrintHelp_SeqSBAIJ, 1319 MatScale_SeqSBAIJ, 1320 0, 1321 0, 1322 0, 1323 /*50*/ MatGetBlockSize_SeqSBAIJ, 1324 MatGetRowIJ_SeqSBAIJ, 1325 MatRestoreRowIJ_SeqSBAIJ, 1326 0, 1327 0, 1328 /*55*/ 0, 1329 0, 1330 0, 1331 0, 1332 MatSetValuesBlocked_SeqSBAIJ, 1333 /*60*/ MatGetSubMatrix_SeqSBAIJ, 1334 0, 1335 0, 1336 MatGetPetscMaps_Petsc, 1337 0, 1338 /*65*/ 0, 1339 0, 1340 0, 1341 0, 1342 0, 1343 /*70*/ MatGetRowMax_SeqSBAIJ, 1344 0, 1345 0, 1346 0, 1347 0, 1348 /*75*/ 0, 1349 0, 1350 0, 1351 0, 1352 0, 1353 /*80*/ 0, 1354 0, 1355 0, 1356 #if !defined(PETSC_USE_COMPLEX) 1357 MatGetInertia_SeqSBAIJ, 1358 #else 1359 0, 1360 #endif 1361 MatLoad_SeqSBAIJ, 1362 /*85*/ MatIsSymmetric_SeqSBAIJ, 1363 MatIsHermitian_SeqSBAIJ, 1364 MatIsStructurallySymmetric_SeqSBAIJ, 1365 0, 1366 0, 1367 /*90*/ 0, 1368 0, 1369 0, 1370 0, 1371 0, 1372 /*95*/ 0, 1373 0, 1374 0, 1375 0}; 1376 1377 EXTERN_C_BEGIN 1378 #undef __FUNCT__ 1379 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1380 PetscErrorCode MatStoreValues_SeqSBAIJ(Mat mat) 1381 { 1382 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1383 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1384 PetscErrorCode ierr; 1385 1386 PetscFunctionBegin; 1387 if (aij->nonew != 1) { 1388 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1389 } 1390 1391 /* allocate space for values if not already there */ 1392 if (!aij->saved_values) { 1393 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1394 } 1395 1396 /* copy values over */ 1397 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1398 PetscFunctionReturn(0); 1399 } 1400 EXTERN_C_END 1401 1402 EXTERN_C_BEGIN 1403 #undef __FUNCT__ 1404 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1405 PetscErrorCode MatRetrieveValues_SeqSBAIJ(Mat mat) 1406 { 1407 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1408 PetscErrorCode ierr; 1409 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1410 1411 PetscFunctionBegin; 1412 if (aij->nonew != 1) { 1413 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1414 } 1415 if (!aij->saved_values) { 1416 SETERRQ(1,"Must call MatStoreValues(A);first"); 1417 } 1418 1419 /* copy values over */ 1420 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1421 PetscFunctionReturn(0); 1422 } 1423 EXTERN_C_END 1424 1425 EXTERN_C_BEGIN 1426 #undef __FUNCT__ 1427 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ" 1428 PetscErrorCode MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,int bs,int nz,int *nnz) 1429 { 1430 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1431 PetscErrorCode ierr; 1432 int i,len,mbs,bs2; 1433 PetscTruth flg; 1434 1435 PetscFunctionBegin; 1436 B->preallocated = PETSC_TRUE; 1437 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1438 mbs = B->m/bs; 1439 bs2 = bs*bs; 1440 1441 if (mbs*bs != B->m) { 1442 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1443 } 1444 1445 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1446 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1447 if (nnz) { 1448 for (i=0; i<mbs; i++) { 1449 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1450 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],mbs); 1451 } 1452 } 1453 1454 ierr = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1455 if (!flg) { 1456 switch (bs) { 1457 case 1: 1458 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1459 B->ops->solve = MatSolve_SeqSBAIJ_1; 1460 B->ops->solves = MatSolves_SeqSBAIJ_1; 1461 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_1; 1462 B->ops->mult = MatMult_SeqSBAIJ_1; 1463 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1464 break; 1465 case 2: 1466 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1467 B->ops->solve = MatSolve_SeqSBAIJ_2; 1468 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_2; 1469 B->ops->mult = MatMult_SeqSBAIJ_2; 1470 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1471 break; 1472 case 3: 1473 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1474 B->ops->solve = MatSolve_SeqSBAIJ_3; 1475 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_3; 1476 B->ops->mult = MatMult_SeqSBAIJ_3; 1477 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1478 break; 1479 case 4: 1480 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1481 B->ops->solve = MatSolve_SeqSBAIJ_4; 1482 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_4; 1483 B->ops->mult = MatMult_SeqSBAIJ_4; 1484 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1485 break; 1486 case 5: 1487 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1488 B->ops->solve = MatSolve_SeqSBAIJ_5; 1489 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_5; 1490 B->ops->mult = MatMult_SeqSBAIJ_5; 1491 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1492 break; 1493 case 6: 1494 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1495 B->ops->solve = MatSolve_SeqSBAIJ_6; 1496 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_6; 1497 B->ops->mult = MatMult_SeqSBAIJ_6; 1498 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1499 break; 1500 case 7: 1501 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1502 B->ops->solve = MatSolve_SeqSBAIJ_7; 1503 B->ops->solvetranspose = MatSolveTranspose_SeqSBAIJ_7; 1504 B->ops->mult = MatMult_SeqSBAIJ_7; 1505 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1506 break; 1507 } 1508 } 1509 1510 b->mbs = mbs; 1511 b->nbs = mbs; 1512 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 1513 if (!nnz) { 1514 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1515 else if (nz <= 0) nz = 1; 1516 for (i=0; i<mbs; i++) { 1517 b->imax[i] = nz; 1518 } 1519 nz = nz*mbs; /* total nz */ 1520 } else { 1521 nz = 0; 1522 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1523 } 1524 /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1525 1526 /* allocate the matrix space */ 1527 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 1528 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1529 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1530 b->j = (int*)(b->a + nz*bs2); 1531 ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 1532 b->i = b->j + nz; 1533 b->singlemalloc = PETSC_TRUE; 1534 1535 /* pointer to beginning of each row */ 1536 b->i[0] = 0; 1537 for (i=1; i<mbs+1; i++) { 1538 b->i[i] = b->i[i-1] + b->imax[i-1]; 1539 } 1540 1541 /* b->ilen will count nonzeros in each block row so far. */ 1542 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 1543 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1544 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1545 1546 b->bs = bs; 1547 b->bs2 = bs2; 1548 b->nz = 0; 1549 b->maxnz = nz*bs2; 1550 1551 b->inew = 0; 1552 b->jnew = 0; 1553 b->anew = 0; 1554 b->a2anew = 0; 1555 b->permute = PETSC_FALSE; 1556 PetscFunctionReturn(0); 1557 } 1558 EXTERN_C_END 1559 1560 EXTERN_C_BEGIN 1561 EXTERN PetscErrorCode MatConvert_SeqSBAIJ_SeqAIJ(Mat,const MatType,Mat*); 1562 EXTERN PetscErrorCode MatConvert_SeqSBAIJ_SeqBAIJ(Mat,const MatType,Mat*); 1563 EXTERN_C_END 1564 1565 /*MC 1566 MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices, 1567 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1568 1569 Options Database Keys: 1570 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions() 1571 1572 Level: beginner 1573 1574 .seealso: MatCreateSeqSBAIJ 1575 M*/ 1576 1577 EXTERN_C_BEGIN 1578 #undef __FUNCT__ 1579 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1580 PetscErrorCode MatCreate_SeqSBAIJ(Mat B) 1581 { 1582 Mat_SeqSBAIJ *b; 1583 PetscErrorCode ierr; 1584 int size; 1585 1586 PetscFunctionBegin; 1587 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1588 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1589 B->m = B->M = PetscMax(B->m,B->M); 1590 B->n = B->N = PetscMax(B->n,B->N); 1591 1592 ierr = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1593 B->data = (void*)b; 1594 ierr = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr); 1595 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1596 B->ops->destroy = MatDestroy_SeqSBAIJ; 1597 B->ops->view = MatView_SeqSBAIJ; 1598 B->factor = 0; 1599 B->lupivotthreshold = 1.0; 1600 B->mapping = 0; 1601 b->row = 0; 1602 b->icol = 0; 1603 b->reallocs = 0; 1604 b->saved_values = 0; 1605 1606 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1607 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1608 1609 b->sorted = PETSC_FALSE; 1610 b->roworiented = PETSC_TRUE; 1611 b->nonew = 0; 1612 b->diag = 0; 1613 b->solve_work = 0; 1614 b->mult_work = 0; 1615 B->spptr = 0; 1616 b->keepzeroedrows = PETSC_FALSE; 1617 b->xtoy = 0; 1618 b->XtoY = 0; 1619 1620 b->inew = 0; 1621 b->jnew = 0; 1622 b->anew = 0; 1623 b->a2anew = 0; 1624 b->permute = PETSC_FALSE; 1625 1626 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1627 "MatStoreValues_SeqSBAIJ", 1628 MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1629 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1630 "MatRetrieveValues_SeqSBAIJ", 1631 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1632 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1633 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1634 MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1635 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C", 1636 "MatConvert_SeqSBAIJ_SeqAIJ", 1637 MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr); 1638 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C", 1639 "MatConvert_SeqSBAIJ_SeqBAIJ", 1640 MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr); 1641 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C", 1642 "MatSeqSBAIJSetPreallocation_SeqSBAIJ", 1643 MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr); 1644 1645 B->symmetric = PETSC_TRUE; 1646 B->structurally_symmetric = PETSC_TRUE; 1647 B->symmetric_set = PETSC_TRUE; 1648 B->structurally_symmetric_set = PETSC_TRUE; 1649 PetscFunctionReturn(0); 1650 } 1651 EXTERN_C_END 1652 1653 #undef __FUNCT__ 1654 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1655 /*@C 1656 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1657 compressed row) format. For good matrix assembly performance the 1658 user should preallocate the matrix storage by setting the parameter nz 1659 (or the array nnz). By setting these parameters accurately, performance 1660 during matrix assembly can be increased by more than a factor of 50. 1661 1662 Collective on Mat 1663 1664 Input Parameters: 1665 + A - the symmetric matrix 1666 . bs - size of block 1667 . nz - number of block nonzeros per block row (same for all rows) 1668 - nnz - array containing the number of block nonzeros in the upper triangular plus 1669 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1670 1671 Options Database Keys: 1672 . -mat_no_unroll - uses code that does not unroll the loops in the 1673 block calculations (much slower) 1674 . -mat_block_size - size of the blocks to use 1675 1676 Level: intermediate 1677 1678 Notes: 1679 Specify the preallocated storage with either nz or nnz (not both). 1680 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1681 allocation. For additional details, see the users manual chapter on 1682 matrices. 1683 1684 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1685 @*/ 1686 PetscErrorCode MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[]) { 1687 PetscErrorCode ierr,(*f)(Mat,int,int,const int[]); 1688 1689 PetscFunctionBegin; 1690 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1691 if (f) { 1692 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 1693 } 1694 PetscFunctionReturn(0); 1695 } 1696 1697 #undef __FUNCT__ 1698 #define __FUNCT__ "MatCreateSeqSBAIJ" 1699 /*@C 1700 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1701 compressed row) format. For good matrix assembly performance the 1702 user should preallocate the matrix storage by setting the parameter nz 1703 (or the array nnz). By setting these parameters accurately, performance 1704 during matrix assembly can be increased by more than a factor of 50. 1705 1706 Collective on MPI_Comm 1707 1708 Input Parameters: 1709 + comm - MPI communicator, set to PETSC_COMM_SELF 1710 . bs - size of block 1711 . m - number of rows, or number of columns 1712 . nz - number of block nonzeros per block row (same for all rows) 1713 - nnz - array containing the number of block nonzeros in the upper triangular plus 1714 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1715 1716 Output Parameter: 1717 . A - the symmetric matrix 1718 1719 Options Database Keys: 1720 . -mat_no_unroll - uses code that does not unroll the loops in the 1721 block calculations (much slower) 1722 . -mat_block_size - size of the blocks to use 1723 1724 Level: intermediate 1725 1726 Notes: 1727 1728 Specify the preallocated storage with either nz or nnz (not both). 1729 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1730 allocation. For additional details, see the users manual chapter on 1731 matrices. 1732 1733 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1734 @*/ 1735 PetscErrorCode MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A) 1736 { 1737 PetscErrorCode ierr; 1738 1739 PetscFunctionBegin; 1740 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1741 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1742 ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1743 PetscFunctionReturn(0); 1744 } 1745 1746 #undef __FUNCT__ 1747 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1748 PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1749 { 1750 Mat C; 1751 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1752 PetscErrorCode ierr; 1753 int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 1754 1755 PetscFunctionBegin; 1756 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1757 1758 *B = 0; 1759 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1760 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 1761 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1762 c = (Mat_SeqSBAIJ*)C->data; 1763 1764 C->preallocated = PETSC_TRUE; 1765 C->factor = A->factor; 1766 c->row = 0; 1767 c->icol = 0; 1768 c->saved_values = 0; 1769 c->keepzeroedrows = a->keepzeroedrows; 1770 C->assembled = PETSC_TRUE; 1771 1772 C->M = A->M; 1773 C->N = A->N; 1774 c->bs = a->bs; 1775 c->bs2 = a->bs2; 1776 c->mbs = a->mbs; 1777 c->nbs = a->nbs; 1778 1779 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1780 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1781 for (i=0; i<mbs; i++) { 1782 c->imax[i] = a->imax[i]; 1783 c->ilen[i] = a->ilen[i]; 1784 } 1785 1786 /* allocate the matrix space */ 1787 c->singlemalloc = PETSC_TRUE; 1788 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1789 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1790 c->j = (int*)(c->a + nz*bs2); 1791 c->i = c->j + nz; 1792 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1793 if (mbs > 0) { 1794 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1795 if (cpvalues == MAT_COPY_VALUES) { 1796 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1797 } else { 1798 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1799 } 1800 } 1801 1802 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1803 c->sorted = a->sorted; 1804 c->roworiented = a->roworiented; 1805 c->nonew = a->nonew; 1806 1807 if (a->diag) { 1808 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1809 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1810 for (i=0; i<mbs; i++) { 1811 c->diag[i] = a->diag[i]; 1812 } 1813 } else c->diag = 0; 1814 c->nz = a->nz; 1815 c->maxnz = a->maxnz; 1816 c->solve_work = 0; 1817 c->mult_work = 0; 1818 *B = C; 1819 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1820 PetscFunctionReturn(0); 1821 } 1822 1823 #undef __FUNCT__ 1824 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1825 PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer,const MatType type,Mat *A) 1826 { 1827 Mat_SeqSBAIJ *a; 1828 Mat B; 1829 PetscErrorCode ierr; 1830 int i,nz,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1831 int *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 1832 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1833 int *masked,nmask,tmp,bs2,ishift; 1834 PetscScalar *aa; 1835 MPI_Comm comm = ((PetscObject)viewer)->comm; 1836 1837 PetscFunctionBegin; 1838 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1839 bs2 = bs*bs; 1840 1841 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1842 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1843 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1844 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1845 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1846 M = header[1]; N = header[2]; nz = header[3]; 1847 1848 if (header[3] < 0) { 1849 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1850 } 1851 1852 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1853 1854 /* 1855 This code adds extra rows to make sure the number of rows is 1856 divisible by the blocksize 1857 */ 1858 mbs = M/bs; 1859 extra_rows = bs - M + bs*(mbs); 1860 if (extra_rows == bs) extra_rows = 0; 1861 else mbs++; 1862 if (extra_rows) { 1863 PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n"); 1864 } 1865 1866 /* read in row lengths */ 1867 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1868 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1869 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1870 1871 /* read in column indices */ 1872 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1873 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1874 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1875 1876 /* loop over row lengths determining block row lengths */ 1877 ierr = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr); 1878 ierr = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1879 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1880 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1881 masked = mask + mbs; 1882 rowcount = 0; nzcount = 0; 1883 for (i=0; i<mbs; i++) { 1884 nmask = 0; 1885 for (j=0; j<bs; j++) { 1886 kmax = rowlengths[rowcount]; 1887 for (k=0; k<kmax; k++) { 1888 tmp = jj[nzcount++]/bs; /* block col. index */ 1889 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1890 } 1891 rowcount++; 1892 } 1893 s_browlengths[i] += nmask; 1894 1895 /* zero out the mask elements we set */ 1896 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1897 } 1898 1899 /* create our matrix */ 1900 ierr = MatCreate(comm,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows,&B);CHKERRQ(ierr); 1901 ierr = MatSetType(B,type);CHKERRQ(ierr); 1902 ierr = MatSeqSBAIJSetPreallocation(B,bs,0,s_browlengths);CHKERRQ(ierr); 1903 a = (Mat_SeqSBAIJ*)B->data; 1904 1905 /* set matrix "i" values */ 1906 a->i[0] = 0; 1907 for (i=1; i<= mbs; i++) { 1908 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1909 a->ilen[i-1] = s_browlengths[i-1]; 1910 } 1911 a->nz = a->i[mbs]; 1912 1913 /* read in nonzero values */ 1914 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1915 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1916 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1917 1918 /* set "a" and "j" values into matrix */ 1919 nzcount = 0; jcount = 0; 1920 for (i=0; i<mbs; i++) { 1921 nzcountb = nzcount; 1922 nmask = 0; 1923 for (j=0; j<bs; j++) { 1924 kmax = rowlengths[i*bs+j]; 1925 for (k=0; k<kmax; k++) { 1926 tmp = jj[nzcount++]/bs; /* block col. index */ 1927 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1928 } 1929 } 1930 /* sort the masked values */ 1931 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1932 1933 /* set "j" values into matrix */ 1934 maskcount = 1; 1935 for (j=0; j<nmask; j++) { 1936 a->j[jcount++] = masked[j]; 1937 mask[masked[j]] = maskcount++; 1938 } 1939 1940 /* set "a" values into matrix */ 1941 ishift = bs2*a->i[i]; 1942 for (j=0; j<bs; j++) { 1943 kmax = rowlengths[i*bs+j]; 1944 for (k=0; k<kmax; k++) { 1945 tmp = jj[nzcountb]/bs ; /* block col. index */ 1946 if (tmp >= i){ 1947 block = mask[tmp] - 1; 1948 point = jj[nzcountb] - bs*tmp; 1949 idx = ishift + bs2*block + j + bs*point; 1950 a->a[idx] = aa[nzcountb]; 1951 } 1952 nzcountb++; 1953 } 1954 } 1955 /* zero out the mask elements we set */ 1956 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1957 } 1958 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1959 1960 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1961 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1962 ierr = PetscFree(aa);CHKERRQ(ierr); 1963 ierr = PetscFree(jj);CHKERRQ(ierr); 1964 ierr = PetscFree(mask);CHKERRQ(ierr); 1965 1966 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1967 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1968 ierr = MatView_Private(B);CHKERRQ(ierr); 1969 *A = B; 1970 PetscFunctionReturn(0); 1971 } 1972 1973 #undef __FUNCT__ 1974 #define __FUNCT__ "MatRelax_SeqSBAIJ" 1975 PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1976 { 1977 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1978 MatScalar *aa=a->a,*v,*v1; 1979 PetscScalar *x,*b,*t,sum,d; 1980 PetscErrorCode ierr; 1981 int m=a->mbs,bs=a->bs,*ai=a->i,*aj=a->j; 1982 int nz,nz1,*vj,*vj1,i; 1983 1984 PetscFunctionBegin; 1985 its = its*lits; 1986 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1987 1988 if (bs > 1) 1989 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 1990 1991 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1992 if (xx != bb) { 1993 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1994 } else { 1995 b = x; 1996 } 1997 1998 ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr); 1999 2000 if (flag & SOR_ZERO_INITIAL_GUESS) { 2001 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2002 for (i=0; i<m; i++) 2003 t[i] = b[i]; 2004 2005 for (i=0; i<m; i++){ 2006 d = *(aa + ai[i]); /* diag[i] */ 2007 v = aa + ai[i] + 1; 2008 vj = aj + ai[i] + 1; 2009 nz = ai[i+1] - ai[i] - 1; 2010 x[i] = omega*t[i]/d; 2011 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 2012 PetscLogFlops(2*nz-1); 2013 } 2014 } 2015 2016 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2017 for (i=0; i<m; i++) 2018 t[i] = b[i]; 2019 2020 for (i=0; i<m-1; i++){ /* update rhs */ 2021 v = aa + ai[i] + 1; 2022 vj = aj + ai[i] + 1; 2023 nz = ai[i+1] - ai[i] - 1; 2024 while (nz--) t[*vj++] -= x[i]*(*v++); 2025 PetscLogFlops(2*nz-1); 2026 } 2027 for (i=m-1; i>=0; i--){ 2028 d = *(aa + ai[i]); 2029 v = aa + ai[i] + 1; 2030 vj = aj + ai[i] + 1; 2031 nz = ai[i+1] - ai[i] - 1; 2032 sum = t[i]; 2033 while (nz--) sum -= x[*vj++]*(*v++); 2034 PetscLogFlops(2*nz-1); 2035 x[i] = (1-omega)*x[i] + omega*sum/d; 2036 } 2037 } 2038 its--; 2039 } 2040 2041 while (its--) { 2042 /* 2043 forward sweep: 2044 for i=0,...,m-1: 2045 sum[i] = (b[i] - U(i,:)x )/d[i]; 2046 x[i] = (1-omega)x[i] + omega*sum[i]; 2047 b = b - x[i]*U^T(i,:); 2048 2049 */ 2050 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2051 for (i=0; i<m; i++) 2052 t[i] = b[i]; 2053 2054 for (i=0; i<m; i++){ 2055 d = *(aa + ai[i]); /* diag[i] */ 2056 v = aa + ai[i] + 1; v1=v; 2057 vj = aj + ai[i] + 1; vj1=vj; 2058 nz = ai[i+1] - ai[i] - 1; nz1=nz; 2059 sum = t[i]; 2060 while (nz1--) sum -= (*v1++)*x[*vj1++]; 2061 x[i] = (1-omega)*x[i] + omega*sum/d; 2062 while (nz--) t[*vj++] -= x[i]*(*v++); 2063 PetscLogFlops(4*nz-2); 2064 } 2065 } 2066 2067 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2068 /* 2069 backward sweep: 2070 b = b - x[i]*U^T(i,:), i=0,...,n-2 2071 for i=m-1,...,0: 2072 sum[i] = (b[i] - U(i,:)x )/d[i]; 2073 x[i] = (1-omega)x[i] + omega*sum[i]; 2074 */ 2075 for (i=0; i<m; i++) 2076 t[i] = b[i]; 2077 2078 for (i=0; i<m-1; i++){ /* update rhs */ 2079 v = aa + ai[i] + 1; 2080 vj = aj + ai[i] + 1; 2081 nz = ai[i+1] - ai[i] - 1; 2082 while (nz--) t[*vj++] -= x[i]*(*v++); 2083 PetscLogFlops(2*nz-1); 2084 } 2085 for (i=m-1; i>=0; i--){ 2086 d = *(aa + ai[i]); 2087 v = aa + ai[i] + 1; 2088 vj = aj + ai[i] + 1; 2089 nz = ai[i+1] - ai[i] - 1; 2090 sum = t[i]; 2091 while (nz--) sum -= x[*vj++]*(*v++); 2092 PetscLogFlops(2*nz-1); 2093 x[i] = (1-omega)*x[i] + omega*sum/d; 2094 } 2095 } 2096 } 2097 2098 ierr = PetscFree(t);CHKERRQ(ierr); 2099 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2100 if (bb != xx) { 2101 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2102 } 2103 2104 PetscFunctionReturn(0); 2105 } 2106 2107 2108 2109 2110 2111 2112