1 2 /* 3 Defines the basic matrix operations for the SBAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 7 #include "src/inline/spops.h" 8 #include "src/mat/impls/sbaij/seq/sbaij.h" 9 10 #define CHUNKSIZE 10 11 12 /* 13 Checks for missing diagonals 14 */ 15 #undef __FUNCT__ 16 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 17 PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A) 18 { 19 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 20 PetscErrorCode ierr; 21 int *diag,*jj = a->j,i; 22 23 PetscFunctionBegin; 24 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 25 diag = a->diag; 26 for (i=0; i<a->mbs; i++) { 27 if (jj[diag[i]] != i) SETERRQ1(1,"Matrix is missing diagonal number %d",i); 28 } 29 PetscFunctionReturn(0); 30 } 31 32 #undef __FUNCT__ 33 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 34 PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A) 35 { 36 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 37 PetscErrorCode ierr; 38 int i,mbs = a->mbs; 39 40 PetscFunctionBegin; 41 if (a->diag) PetscFunctionReturn(0); 42 43 ierr = PetscMalloc((mbs+1)*sizeof(int),&a->diag);CHKERRQ(ierr); 44 PetscLogObjectMemory(A,(mbs+1)*sizeof(int)); 45 for (i=0; i<mbs; i++) a->diag[i] = a->i[i]; 46 PetscFunctionReturn(0); 47 } 48 49 #undef __FUNCT__ 50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 51 static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done) 52 { 53 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 54 int n = a->mbs,i; 55 56 PetscFunctionBegin; 57 *nn = n; 58 if (!ia) PetscFunctionReturn(0); 59 60 if (oshift == 1) { 61 /* temporarily add 1 to i and j indices */ 62 int nz = a->i[n]; 63 for (i=0; i<nz; i++) a->j[i]++; 64 for (i=0; i<n+1; i++) a->i[i]++; 65 *ia = a->i; *ja = a->j; 66 } else { 67 *ia = a->i; *ja = a->j; 68 } 69 PetscFunctionReturn(0); 70 } 71 72 #undef __FUNCT__ 73 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 74 static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done) 75 { 76 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 77 int i,n = a->mbs; 78 79 PetscFunctionBegin; 80 if (!ia) PetscFunctionReturn(0); 81 82 if (oshift == 1) { 83 int nz = a->i[n]-1; 84 for (i=0; i<nz; i++) a->j[i]--; 85 for (i=0; i<n+1; i++) a->i[i]--; 86 } 87 PetscFunctionReturn(0); 88 } 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatGetBlockSize_SeqSBAIJ" 92 PetscErrorCode MatGetBlockSize_SeqSBAIJ(Mat mat,int *bs) 93 { 94 Mat_SeqSBAIJ *sbaij = (Mat_SeqSBAIJ*)mat->data; 95 96 PetscFunctionBegin; 97 *bs = sbaij->bs; 98 PetscFunctionReturn(0); 99 } 100 101 #undef __FUNCT__ 102 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 103 PetscErrorCode MatDestroy_SeqSBAIJ(Mat A) 104 { 105 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 106 PetscErrorCode ierr; 107 108 PetscFunctionBegin; 109 #if defined(PETSC_USE_LOG) 110 PetscLogObjectState((PetscObject)A,"Rows=%d, NZ=%d",A->m,a->nz); 111 #endif 112 ierr = PetscFree(a->a);CHKERRQ(ierr); 113 if (!a->singlemalloc) { 114 ierr = PetscFree(a->i);CHKERRQ(ierr); 115 ierr = PetscFree(a->j);CHKERRQ(ierr); 116 } 117 if (a->row) { 118 ierr = ISDestroy(a->row);CHKERRQ(ierr); 119 } 120 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 121 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 122 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 123 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 124 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 125 if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);} 126 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 127 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 128 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 129 130 if (a->inew){ 131 ierr = PetscFree(a->inew);CHKERRQ(ierr); 132 a->inew = 0; 133 } 134 ierr = PetscFree(a);CHKERRQ(ierr); 135 136 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 137 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 138 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 139 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 140 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqbaij_C","",PETSC_NULL);CHKERRQ(ierr); 141 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 142 PetscFunctionReturn(0); 143 } 144 145 #undef __FUNCT__ 146 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 147 PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op) 148 { 149 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 150 151 PetscFunctionBegin; 152 switch (op) { 153 case MAT_ROW_ORIENTED: 154 a->roworiented = PETSC_TRUE; 155 break; 156 case MAT_COLUMN_ORIENTED: 157 a->roworiented = PETSC_FALSE; 158 break; 159 case MAT_COLUMNS_SORTED: 160 a->sorted = PETSC_TRUE; 161 break; 162 case MAT_COLUMNS_UNSORTED: 163 a->sorted = PETSC_FALSE; 164 break; 165 case MAT_KEEP_ZEROED_ROWS: 166 a->keepzeroedrows = PETSC_TRUE; 167 break; 168 case MAT_NO_NEW_NONZERO_LOCATIONS: 169 a->nonew = 1; 170 break; 171 case MAT_NEW_NONZERO_LOCATION_ERR: 172 a->nonew = -1; 173 break; 174 case MAT_NEW_NONZERO_ALLOCATION_ERR: 175 a->nonew = -2; 176 break; 177 case MAT_YES_NEW_NONZERO_LOCATIONS: 178 a->nonew = 0; 179 break; 180 case MAT_ROWS_SORTED: 181 case MAT_ROWS_UNSORTED: 182 case MAT_YES_NEW_DIAGONALS: 183 case MAT_IGNORE_OFF_PROC_ENTRIES: 184 case MAT_USE_HASH_TABLE: 185 PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n"); 186 break; 187 case MAT_NO_NEW_DIAGONALS: 188 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 189 case MAT_NOT_SYMMETRIC: 190 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 191 case MAT_HERMITIAN: 192 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 193 case MAT_SYMMETRIC: 194 case MAT_STRUCTURALLY_SYMMETRIC: 195 case MAT_NOT_HERMITIAN: 196 case MAT_SYMMETRY_ETERNAL: 197 case MAT_NOT_SYMMETRY_ETERNAL: 198 break; 199 default: 200 SETERRQ(PETSC_ERR_SUP,"unknown option"); 201 } 202 PetscFunctionReturn(0); 203 } 204 205 #undef __FUNCT__ 206 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 207 PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,int row,int *ncols,int **cols,PetscScalar **v) 208 { 209 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 210 PetscErrorCode ierr; 211 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2; 212 MatScalar *aa,*aa_i; 213 PetscScalar *v_i; 214 215 PetscFunctionBegin; 216 bs = a->bs; 217 ai = a->i; 218 aj = a->j; 219 aa = a->a; 220 bs2 = a->bs2; 221 222 if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %d out of range", row); 223 224 bn = row/bs; /* Block number */ 225 bp = row % bs; /* Block position */ 226 M = ai[bn+1] - ai[bn]; 227 *ncols = bs*M; 228 229 if (v) { 230 *v = 0; 231 if (*ncols) { 232 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 233 for (i=0; i<M; i++) { /* for each block in the block row */ 234 v_i = *v + i*bs; 235 aa_i = aa + bs2*(ai[bn] + i); 236 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 237 } 238 } 239 } 240 241 if (cols) { 242 *cols = 0; 243 if (*ncols) { 244 ierr = PetscMalloc((*ncols+row)*sizeof(int),cols);CHKERRQ(ierr); 245 for (i=0; i<M; i++) { /* for each block in the block row */ 246 cols_i = *cols + i*bs; 247 itmp = bs*aj[ai[bn] + i]; 248 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 249 } 250 } 251 } 252 253 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 254 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 255 #ifdef column_search 256 v_i = *v + M*bs; 257 cols_i = *cols + M*bs; 258 for (i=0; i<bn; i++){ /* for each block row */ 259 M = ai[i+1] - ai[i]; 260 for (j=0; j<M; j++){ 261 itmp = aj[ai[i] + j]; /* block column value */ 262 if (itmp == bn){ 263 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 264 for (k=0; k<bs; k++) { 265 *cols_i++ = i*bs+k; 266 *v_i++ = aa_i[k]; 267 } 268 *ncols += bs; 269 break; 270 } 271 } 272 } 273 #endif 274 275 PetscFunctionReturn(0); 276 } 277 278 #undef __FUNCT__ 279 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 280 PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 281 { 282 PetscErrorCode ierr; 283 284 PetscFunctionBegin; 285 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 286 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 287 PetscFunctionReturn(0); 288 } 289 290 #undef __FUNCT__ 291 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 292 PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,Mat *B) 293 { 294 PetscErrorCode ierr; 295 PetscFunctionBegin; 296 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 297 PetscFunctionReturn(0); 298 } 299 300 #undef __FUNCT__ 301 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 302 static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 303 { 304 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 305 PetscErrorCode ierr; 306 int i,j,bs = a->bs,k,l,bs2=a->bs2; 307 char *name; 308 PetscViewerFormat format; 309 310 PetscFunctionBegin; 311 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 312 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 313 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 314 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 315 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 316 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 317 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 318 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 319 for (i=0; i<a->mbs; i++) { 320 for (j=0; j<bs; j++) { 321 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 322 for (k=a->i[i]; k<a->i[i+1]; k++) { 323 for (l=0; l<bs; l++) { 324 #if defined(PETSC_USE_COMPLEX) 325 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 326 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 327 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 328 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 329 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 330 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 331 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 332 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 333 } 334 #else 335 if (a->a[bs2*k + l*bs + j] != 0.0) { 336 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 337 } 338 #endif 339 } 340 } 341 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 342 } 343 } 344 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 345 } else { 346 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 347 for (i=0; i<a->mbs; i++) { 348 for (j=0; j<bs; j++) { 349 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 350 for (k=a->i[i]; k<a->i[i+1]; k++) { 351 for (l=0; l<bs; l++) { 352 #if defined(PETSC_USE_COMPLEX) 353 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 354 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 355 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 356 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 357 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 358 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 359 } else { 360 ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 361 } 362 #else 363 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 364 #endif 365 } 366 } 367 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 368 } 369 } 370 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 371 } 372 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 373 PetscFunctionReturn(0); 374 } 375 376 #undef __FUNCT__ 377 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 378 static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 379 { 380 Mat A = (Mat) Aa; 381 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 382 PetscErrorCode ierr; 383 int row,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2,rank; 384 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 385 MatScalar *aa; 386 MPI_Comm comm; 387 PetscViewer viewer; 388 389 PetscFunctionBegin; 390 /* 391 This is nasty. If this is called from an originally parallel matrix 392 then all processes call this,but only the first has the matrix so the 393 rest should return immediately. 394 */ 395 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 396 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 397 if (rank) PetscFunctionReturn(0); 398 399 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 400 401 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 402 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 403 404 /* loop over matrix elements drawing boxes */ 405 color = PETSC_DRAW_BLUE; 406 for (i=0,row=0; i<mbs; i++,row+=bs) { 407 for (j=a->i[i]; j<a->i[i+1]; j++) { 408 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 409 x_l = a->j[j]*bs; x_r = x_l + 1.0; 410 aa = a->a + j*bs2; 411 for (k=0; k<bs; k++) { 412 for (l=0; l<bs; l++) { 413 if (PetscRealPart(*aa++) >= 0.) continue; 414 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 415 } 416 } 417 } 418 } 419 color = PETSC_DRAW_CYAN; 420 for (i=0,row=0; i<mbs; i++,row+=bs) { 421 for (j=a->i[i]; j<a->i[i+1]; j++) { 422 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 423 x_l = a->j[j]*bs; x_r = x_l + 1.0; 424 aa = a->a + j*bs2; 425 for (k=0; k<bs; k++) { 426 for (l=0; l<bs; l++) { 427 if (PetscRealPart(*aa++) != 0.) continue; 428 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 429 } 430 } 431 } 432 } 433 434 color = PETSC_DRAW_RED; 435 for (i=0,row=0; i<mbs; i++,row+=bs) { 436 for (j=a->i[i]; j<a->i[i+1]; j++) { 437 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 438 x_l = a->j[j]*bs; x_r = x_l + 1.0; 439 aa = a->a + j*bs2; 440 for (k=0; k<bs; k++) { 441 for (l=0; l<bs; l++) { 442 if (PetscRealPart(*aa++) <= 0.) continue; 443 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 444 } 445 } 446 } 447 } 448 PetscFunctionReturn(0); 449 } 450 451 #undef __FUNCT__ 452 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 453 static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 454 { 455 PetscErrorCode ierr; 456 PetscReal xl,yl,xr,yr,w,h; 457 PetscDraw draw; 458 PetscTruth isnull; 459 460 PetscFunctionBegin; 461 462 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 463 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 464 465 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 466 xr = A->m; yr = A->m; h = yr/10.0; w = xr/10.0; 467 xr += w; yr += h; xl = -w; yl = -h; 468 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 469 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 470 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 471 PetscFunctionReturn(0); 472 } 473 474 #undef __FUNCT__ 475 #define __FUNCT__ "MatView_SeqSBAIJ" 476 PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 477 { 478 PetscErrorCode ierr; 479 PetscTruth iascii,isdraw; 480 481 PetscFunctionBegin; 482 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 483 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 484 if (iascii){ 485 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 486 } else if (isdraw) { 487 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 488 } else { 489 Mat B; 490 ierr = MatConvert(A,MATSEQAIJ,&B);CHKERRQ(ierr); 491 ierr = MatView(B,viewer);CHKERRQ(ierr); 492 ierr = MatDestroy(B);CHKERRQ(ierr); 493 } 494 PetscFunctionReturn(0); 495 } 496 497 498 #undef __FUNCT__ 499 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 500 PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[]) 501 { 502 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 503 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 504 int *ai = a->i,*ailen = a->ilen; 505 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 506 MatScalar *ap,*aa = a->a,zero = 0.0; 507 508 PetscFunctionBegin; 509 for (k=0; k<m; k++) { /* loop over rows */ 510 row = im[k]; brow = row/bs; 511 if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",row); 512 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1); 513 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 514 nrow = ailen[brow]; 515 for (l=0; l<n; l++) { /* loop over columns */ 516 if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",in[l]); 517 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1); 518 col = in[l] ; 519 bcol = col/bs; 520 cidx = col%bs; 521 ridx = row%bs; 522 high = nrow; 523 low = 0; /* assume unsorted */ 524 while (high-low > 5) { 525 t = (low+high)/2; 526 if (rp[t] > bcol) high = t; 527 else low = t; 528 } 529 for (i=low; i<high; i++) { 530 if (rp[i] > bcol) break; 531 if (rp[i] == bcol) { 532 *v++ = ap[bs2*i+bs*cidx+ridx]; 533 goto finished; 534 } 535 } 536 *v++ = zero; 537 finished:; 538 } 539 } 540 PetscFunctionReturn(0); 541 } 542 543 544 #undef __FUNCT__ 545 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 546 PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is) 547 { 548 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 549 PetscErrorCode ierr; 550 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 551 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 552 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval; 553 PetscTruth roworiented=a->roworiented; 554 const MatScalar *value = v; 555 MatScalar *ap,*aa = a->a,*bap; 556 557 PetscFunctionBegin; 558 if (roworiented) { 559 stepval = (n-1)*bs; 560 } else { 561 stepval = (m-1)*bs; 562 } 563 for (k=0; k<m; k++) { /* loop over added rows */ 564 row = im[k]; 565 if (row < 0) continue; 566 #if defined(PETSC_USE_BOPT_g) 567 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1); 568 #endif 569 rp = aj + ai[row]; 570 ap = aa + bs2*ai[row]; 571 rmax = imax[row]; 572 nrow = ailen[row]; 573 low = 0; 574 for (l=0; l<n; l++) { /* loop over added columns */ 575 if (in[l] < 0) continue; 576 col = in[l]; 577 #if defined(PETSC_USE_BOPT_g) 578 if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",col,a->nbs-1); 579 #endif 580 if (col < row) continue; /* ignore lower triangular block */ 581 if (roworiented) { 582 value = v + k*(stepval+bs)*bs + l*bs; 583 } else { 584 value = v + l*(stepval+bs)*bs + k*bs; 585 } 586 if (!sorted) low = 0; high = nrow; 587 while (high-low > 7) { 588 t = (low+high)/2; 589 if (rp[t] > col) high = t; 590 else low = t; 591 } 592 for (i=low; i<high; i++) { 593 if (rp[i] > col) break; 594 if (rp[i] == col) { 595 bap = ap + bs2*i; 596 if (roworiented) { 597 if (is == ADD_VALUES) { 598 for (ii=0; ii<bs; ii++,value+=stepval) { 599 for (jj=ii; jj<bs2; jj+=bs) { 600 bap[jj] += *value++; 601 } 602 } 603 } else { 604 for (ii=0; ii<bs; ii++,value+=stepval) { 605 for (jj=ii; jj<bs2; jj+=bs) { 606 bap[jj] = *value++; 607 } 608 } 609 } 610 } else { 611 if (is == ADD_VALUES) { 612 for (ii=0; ii<bs; ii++,value+=stepval) { 613 for (jj=0; jj<bs; jj++) { 614 *bap++ += *value++; 615 } 616 } 617 } else { 618 for (ii=0; ii<bs; ii++,value+=stepval) { 619 for (jj=0; jj<bs; jj++) { 620 *bap++ = *value++; 621 } 622 } 623 } 624 } 625 goto noinsert2; 626 } 627 } 628 if (nonew == 1) goto noinsert2; 629 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 630 if (nrow >= rmax) { 631 /* there is no extra room in row, therefore enlarge */ 632 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 633 MatScalar *new_a; 634 635 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 636 637 /* malloc new storage space */ 638 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 639 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 640 new_j = (int*)(new_a + bs2*new_nz); 641 new_i = new_j + new_nz; 642 643 /* copy over old data into new slots */ 644 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 645 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 646 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 647 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 648 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 649 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 650 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 651 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 652 /* free up old matrix storage */ 653 ierr = PetscFree(a->a);CHKERRQ(ierr); 654 if (!a->singlemalloc) { 655 ierr = PetscFree(a->i);CHKERRQ(ierr); 656 ierr = PetscFree(a->j);CHKERRQ(ierr); 657 } 658 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 659 a->singlemalloc = PETSC_TRUE; 660 661 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 662 rmax = imax[row] = imax[row] + CHUNKSIZE; 663 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 664 a->maxnz += bs2*CHUNKSIZE; 665 a->reallocs++; 666 a->nz++; 667 } 668 N = nrow++ - 1; 669 /* shift up all the later entries in this row */ 670 for (ii=N; ii>=i; ii--) { 671 rp[ii+1] = rp[ii]; 672 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 673 } 674 if (N >= i) { 675 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 676 } 677 rp[i] = col; 678 bap = ap + bs2*i; 679 if (roworiented) { 680 for (ii=0; ii<bs; ii++,value+=stepval) { 681 for (jj=ii; jj<bs2; jj+=bs) { 682 bap[jj] = *value++; 683 } 684 } 685 } else { 686 for (ii=0; ii<bs; ii++,value+=stepval) { 687 for (jj=0; jj<bs; jj++) { 688 *bap++ = *value++; 689 } 690 } 691 } 692 noinsert2:; 693 low = i; 694 } 695 ailen[row] = nrow; 696 } 697 PetscFunctionReturn(0); 698 } 699 700 #undef __FUNCT__ 701 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 702 PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 703 { 704 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 705 PetscErrorCode ierr; 706 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 707 int m = A->m,*ip,N,*ailen = a->ilen; 708 int mbs = a->mbs,bs2 = a->bs2,rmax = 0; 709 MatScalar *aa = a->a,*ap; 710 711 PetscFunctionBegin; 712 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 713 714 if (m) rmax = ailen[0]; 715 for (i=1; i<mbs; i++) { 716 /* move each row back by the amount of empty slots (fshift) before it*/ 717 fshift += imax[i-1] - ailen[i-1]; 718 rmax = PetscMax(rmax,ailen[i]); 719 if (fshift) { 720 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 721 N = ailen[i]; 722 for (j=0; j<N; j++) { 723 ip[j-fshift] = ip[j]; 724 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 725 } 726 } 727 ai[i] = ai[i-1] + ailen[i-1]; 728 } 729 if (mbs) { 730 fshift += imax[mbs-1] - ailen[mbs-1]; 731 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 732 } 733 /* reset ilen and imax for each row */ 734 for (i=0; i<mbs; i++) { 735 ailen[i] = imax[i] = ai[i+1] - ai[i]; 736 } 737 a->nz = ai[mbs]; 738 739 /* diagonals may have moved, reset it */ 740 if (a->diag) { 741 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(int));CHKERRQ(ierr); 742 } 743 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 744 m,A->m,a->bs,fshift*bs2,a->nz*bs2); 745 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %d\n", 746 a->reallocs); 747 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 748 a->reallocs = 0; 749 A->info.nz_unneeded = (PetscReal)fshift*bs2; 750 751 PetscFunctionReturn(0); 752 } 753 754 /* 755 This function returns an array of flags which indicate the locations of contiguous 756 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 757 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 758 Assume: sizes should be long enough to hold all the values. 759 */ 760 #undef __FUNCT__ 761 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 762 PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 763 { 764 int i,j,k,row; 765 PetscTruth flg; 766 767 PetscFunctionBegin; 768 for (i=0,j=0; i<n; j++) { 769 row = idx[i]; 770 if (row%bs!=0) { /* Not the begining of a block */ 771 sizes[j] = 1; 772 i++; 773 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 774 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 775 i++; 776 } else { /* Begining of the block, so check if the complete block exists */ 777 flg = PETSC_TRUE; 778 for (k=1; k<bs; k++) { 779 if (row+k != idx[i+k]) { /* break in the block */ 780 flg = PETSC_FALSE; 781 break; 782 } 783 } 784 if (flg == PETSC_TRUE) { /* No break in the bs */ 785 sizes[j] = bs; 786 i+= bs; 787 } else { 788 sizes[j] = 1; 789 i++; 790 } 791 } 792 } 793 *bs_max = j; 794 PetscFunctionReturn(0); 795 } 796 797 #undef __FUNCT__ 798 #define __FUNCT__ "MatZeroRows_SeqSBAIJ" 799 PetscErrorCode MatZeroRows_SeqSBAIJ(Mat A,IS is,const PetscScalar *diag) 800 { 801 PetscFunctionBegin; 802 SETERRQ(PETSC_ERR_SUP,"No support for this function yet"); 803 } 804 805 /* Only add/insert a(i,j) with i<=j (blocks). 806 Any a(i,j) with i>j input by user is ingored. 807 */ 808 809 #undef __FUNCT__ 810 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 811 PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is) 812 { 813 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 814 PetscErrorCode ierr; 815 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 816 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 817 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 818 int ridx,cidx,bs2=a->bs2; 819 MatScalar *ap,value,*aa=a->a,*bap; 820 821 PetscFunctionBegin; 822 823 for (k=0; k<m; k++) { /* loop over added rows */ 824 row = im[k]; /* row number */ 825 brow = row/bs; /* block row number */ 826 if (row < 0) continue; 827 #if defined(PETSC_USE_BOPT_g) 828 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1); 829 #endif 830 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 831 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 832 rmax = imax[brow]; /* maximum space allocated for this row */ 833 nrow = ailen[brow]; /* actual length of this row */ 834 low = 0; 835 836 for (l=0; l<n; l++) { /* loop over added columns */ 837 if (in[l] < 0) continue; 838 #if defined(PETSC_USE_BOPT_g) 839 if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->m-1); 840 #endif 841 col = in[l]; 842 bcol = col/bs; /* block col number */ 843 844 if (brow <= bcol){ 845 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 846 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 847 /* element value a(k,l) */ 848 if (roworiented) { 849 value = v[l + k*n]; 850 } else { 851 value = v[k + l*m]; 852 } 853 854 /* move pointer bap to a(k,l) quickly and add/insert value */ 855 if (!sorted) low = 0; high = nrow; 856 while (high-low > 7) { 857 t = (low+high)/2; 858 if (rp[t] > bcol) high = t; 859 else low = t; 860 } 861 for (i=low; i<high; i++) { 862 /* printf("The loop of i=low.., rp[%d]=%d\n",i,rp[i]); */ 863 if (rp[i] > bcol) break; 864 if (rp[i] == bcol) { 865 bap = ap + bs2*i + bs*cidx + ridx; 866 if (is == ADD_VALUES) *bap += value; 867 else *bap = value; 868 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 869 if (brow == bcol && ridx < cidx){ 870 bap = ap + bs2*i + bs*ridx + cidx; 871 if (is == ADD_VALUES) *bap += value; 872 else *bap = value; 873 } 874 goto noinsert1; 875 } 876 } 877 878 if (nonew == 1) goto noinsert1; 879 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 880 if (nrow >= rmax) { 881 /* there is no extra room in row, therefore enlarge */ 882 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 883 MatScalar *new_a; 884 885 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); 886 887 /* Malloc new storage space */ 888 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 889 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 890 new_j = (int*)(new_a + bs2*new_nz); 891 new_i = new_j + new_nz; 892 893 /* copy over old data into new slots */ 894 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 895 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 896 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 897 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 898 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 899 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 900 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 901 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 902 /* free up old matrix storage */ 903 ierr = PetscFree(a->a);CHKERRQ(ierr); 904 if (!a->singlemalloc) { 905 ierr = PetscFree(a->i);CHKERRQ(ierr); 906 ierr = PetscFree(a->j);CHKERRQ(ierr); 907 } 908 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 909 a->singlemalloc = PETSC_TRUE; 910 911 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 912 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 913 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 914 a->maxnz += bs2*CHUNKSIZE; 915 a->reallocs++; 916 a->nz++; 917 } 918 919 N = nrow++ - 1; 920 /* shift up all the later entries in this row */ 921 for (ii=N; ii>=i; ii--) { 922 rp[ii+1] = rp[ii]; 923 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 924 } 925 if (N>=i) { 926 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 927 } 928 rp[i] = bcol; 929 ap[bs2*i + bs*cidx + ridx] = value; 930 noinsert1:; 931 low = i; 932 /* } */ 933 } 934 } /* end of if .. if.. */ 935 } /* end of loop over added columns */ 936 ailen[brow] = nrow; 937 } /* end of loop over added rows */ 938 939 PetscFunctionReturn(0); 940 } 941 942 EXTERN PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*); 943 EXTERN PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*); 944 EXTERN PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat,int,IS[],int); 945 EXTERN PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,int,MatReuse,Mat*); 946 EXTERN PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat,int,const IS[],const IS[],MatReuse,Mat*[]); 947 EXTERN PetscErrorCode MatScale_SeqSBAIJ(const PetscScalar*,Mat); 948 EXTERN PetscErrorCode MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *); 949 EXTERN PetscErrorCode MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*); 950 EXTERN PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat,Vec); 951 EXTERN PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec); 952 EXTERN PetscErrorCode MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *); 953 EXTERN PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat); 954 EXTERN PetscErrorCode MatGetRowMax_SeqSBAIJ(Mat,Vec); 955 956 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N(Mat,Vec,Vec); 957 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1(Mat,Vec,Vec); 958 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2(Mat,Vec,Vec); 959 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3(Mat,Vec,Vec); 960 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4(Mat,Vec,Vec); 961 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5(Mat,Vec,Vec); 962 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6(Mat,Vec,Vec); 963 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7(Mat,Vec,Vec); 964 965 EXTERN PetscErrorCode MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs); 966 967 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec); 968 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec); 969 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec); 970 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 971 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec); 972 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec); 973 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec); 974 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec); 975 976 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*); 977 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*); 978 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*); 979 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*); 980 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*); 981 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*); 982 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*); 983 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*); 984 EXTERN PetscErrorCode MatGetInertia_SeqSBAIJ(Mat,int*,int*,int*); 985 986 EXTERN PetscErrorCode MatMult_SeqSBAIJ_1(Mat,Vec,Vec); 987 EXTERN PetscErrorCode MatMult_SeqSBAIJ_2(Mat,Vec,Vec); 988 EXTERN PetscErrorCode MatMult_SeqSBAIJ_3(Mat,Vec,Vec); 989 EXTERN PetscErrorCode MatMult_SeqSBAIJ_4(Mat,Vec,Vec); 990 EXTERN PetscErrorCode MatMult_SeqSBAIJ_5(Mat,Vec,Vec); 991 EXTERN PetscErrorCode MatMult_SeqSBAIJ_6(Mat,Vec,Vec); 992 EXTERN PetscErrorCode MatMult_SeqSBAIJ_7(Mat,Vec,Vec); 993 EXTERN PetscErrorCode MatMult_SeqSBAIJ_N(Mat,Vec,Vec); 994 995 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec); 996 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec); 997 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec); 998 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec); 999 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec); 1000 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec); 1001 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec); 1002 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec); 1003 1004 #ifdef HAVE_MatICCFactor 1005 /* modified from MatILUFactor_SeqSBAIJ, needs further work! */ 1006 #undef __FUNCT__ 1007 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 1008 PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info) 1009 { 1010 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 1011 Mat outA; 1012 PetscErrorCode ierr; 1013 PetscTruth row_identity,col_identity; 1014 1015 PetscFunctionBegin; 1016 outA = inA; 1017 inA->factor = FACTOR_CHOLESKY; 1018 1019 if (!a->diag) { 1020 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 1021 } 1022 /* 1023 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1024 for ILU(0) factorization with natural ordering 1025 */ 1026 switch (a->bs) { 1027 case 1: 1028 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1029 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1030 inA->ops->solves = MatSolves_SeqSBAIJ_1; 1031 PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n"); 1032 case 2: 1033 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 1034 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1035 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1036 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1037 break; 1038 case 3: 1039 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 1040 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1041 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1042 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n"); 1043 break; 1044 case 4: 1045 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 1046 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1047 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1048 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1049 break; 1050 case 5: 1051 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 1052 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1053 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1054 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1055 break; 1056 case 6: 1057 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 1058 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1059 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1060 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1061 break; 1062 case 7: 1063 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 1064 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1065 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1066 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1067 break; 1068 default: 1069 a->row = row; 1070 a->icol = col; 1071 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1072 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1073 1074 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1075 ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr); 1076 PetscLogObjectParent(inA,a->icol); 1077 1078 if (!a->solve_work) { 1079 ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1080 PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar)); 1081 } 1082 } 1083 1084 ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr); 1085 1086 PetscFunctionReturn(0); 1087 } 1088 #endif 1089 1090 #undef __FUNCT__ 1091 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ" 1092 PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat A) 1093 { 1094 static PetscTruth called = PETSC_FALSE; 1095 MPI_Comm comm = A->comm; 1096 PetscErrorCode ierr; 1097 1098 PetscFunctionBegin; 1099 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1100 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1101 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1102 PetscFunctionReturn(0); 1103 } 1104 1105 EXTERN_C_BEGIN 1106 #undef __FUNCT__ 1107 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1108 PetscErrorCode MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,int *indices) 1109 { 1110 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1111 int i,nz,n; 1112 1113 PetscFunctionBegin; 1114 nz = baij->maxnz; 1115 n = mat->n; 1116 for (i=0; i<nz; i++) { 1117 baij->j[i] = indices[i]; 1118 } 1119 baij->nz = nz; 1120 for (i=0; i<n; i++) { 1121 baij->ilen[i] = baij->imax[i]; 1122 } 1123 1124 PetscFunctionReturn(0); 1125 } 1126 EXTERN_C_END 1127 1128 #undef __FUNCT__ 1129 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1130 /*@ 1131 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1132 in the matrix. 1133 1134 Input Parameters: 1135 + mat - the SeqSBAIJ matrix 1136 - indices - the column indices 1137 1138 Level: advanced 1139 1140 Notes: 1141 This can be called if you have precomputed the nonzero structure of the 1142 matrix and want to provide it to the matrix object to improve the performance 1143 of the MatSetValues() operation. 1144 1145 You MUST have set the correct numbers of nonzeros per row in the call to 1146 MatCreateSeqSBAIJ(). 1147 1148 MUST be called before any calls to MatSetValues() 1149 1150 .seealso: MatCreateSeqSBAIJ 1151 @*/ 1152 PetscErrorCode MatSeqSBAIJSetColumnIndices(Mat mat,int *indices) 1153 { 1154 PetscErrorCode ierr,(*f)(Mat,int *); 1155 1156 PetscFunctionBegin; 1157 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1158 PetscValidPointer(indices,2); 1159 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1160 if (f) { 1161 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1162 } else { 1163 SETERRQ(PETSC_ERR_SUP,"Wrong type of matrix to set column indices"); 1164 } 1165 PetscFunctionReturn(0); 1166 } 1167 1168 #undef __FUNCT__ 1169 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1170 PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A) 1171 { 1172 PetscErrorCode ierr; 1173 1174 PetscFunctionBegin; 1175 ierr = MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1176 PetscFunctionReturn(0); 1177 } 1178 1179 #undef __FUNCT__ 1180 #define __FUNCT__ "MatGetArray_SeqSBAIJ" 1181 PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1182 { 1183 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1184 PetscFunctionBegin; 1185 *array = a->a; 1186 PetscFunctionReturn(0); 1187 } 1188 1189 #undef __FUNCT__ 1190 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ" 1191 PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1192 { 1193 PetscFunctionBegin; 1194 PetscFunctionReturn(0); 1195 } 1196 1197 #include "petscblaslapack.h" 1198 #undef __FUNCT__ 1199 #define __FUNCT__ "MatAXPY_SeqSBAIJ" 1200 PetscErrorCode MatAXPY_SeqSBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str) 1201 { 1202 Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data; 1203 PetscErrorCode ierr; 1204 int i,bs=y->bs,bs2,j; 1205 PetscBLASInt bnz = (PetscBLASInt)x->nz,one = 1; 1206 1207 PetscFunctionBegin; 1208 if (str == SAME_NONZERO_PATTERN) { 1209 BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1210 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1211 if (y->xtoy && y->XtoY != X) { 1212 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1213 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1214 } 1215 if (!y->xtoy) { /* get xtoy */ 1216 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1217 y->XtoY = X; 1218 } 1219 bs2 = bs*bs; 1220 for (i=0; i<x->nz; i++) { 1221 j = 0; 1222 while (j < bs2){ 1223 y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]); 1224 j++; 1225 } 1226 } 1227 PetscLogInfo(0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %d/%d = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)); 1228 } else { 1229 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1230 } 1231 PetscFunctionReturn(0); 1232 } 1233 1234 #undef __FUNCT__ 1235 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ" 1236 PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg) 1237 { 1238 PetscFunctionBegin; 1239 *flg = PETSC_TRUE; 1240 PetscFunctionReturn(0); 1241 } 1242 1243 #undef __FUNCT__ 1244 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ" 1245 PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg) 1246 { 1247 PetscFunctionBegin; 1248 *flg = PETSC_TRUE; 1249 PetscFunctionReturn(0); 1250 } 1251 1252 #undef __FUNCT__ 1253 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ" 1254 PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscTruth *flg) 1255 { 1256 PetscFunctionBegin; 1257 *flg = PETSC_FALSE; 1258 PetscFunctionReturn(0); 1259 } 1260 1261 /* -------------------------------------------------------------------*/ 1262 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1263 MatGetRow_SeqSBAIJ, 1264 MatRestoreRow_SeqSBAIJ, 1265 MatMult_SeqSBAIJ_N, 1266 /* 4*/ MatMultAdd_SeqSBAIJ_N, 1267 MatMult_SeqSBAIJ_N, 1268 MatMultAdd_SeqSBAIJ_N, 1269 MatSolve_SeqSBAIJ_N, 1270 0, 1271 0, 1272 /*10*/ 0, 1273 0, 1274 MatCholeskyFactor_SeqSBAIJ, 1275 MatRelax_SeqSBAIJ, 1276 MatTranspose_SeqSBAIJ, 1277 /*15*/ MatGetInfo_SeqSBAIJ, 1278 MatEqual_SeqSBAIJ, 1279 MatGetDiagonal_SeqSBAIJ, 1280 MatDiagonalScale_SeqSBAIJ, 1281 MatNorm_SeqSBAIJ, 1282 /*20*/ 0, 1283 MatAssemblyEnd_SeqSBAIJ, 1284 0, 1285 MatSetOption_SeqSBAIJ, 1286 MatZeroEntries_SeqSBAIJ, 1287 /*25*/ MatZeroRows_SeqSBAIJ, 1288 0, 1289 0, 1290 MatCholeskyFactorSymbolic_SeqSBAIJ, 1291 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1292 /*30*/ MatSetUpPreallocation_SeqSBAIJ, 1293 0, 1294 MatICCFactorSymbolic_SeqSBAIJ, 1295 MatGetArray_SeqSBAIJ, 1296 MatRestoreArray_SeqSBAIJ, 1297 /*35*/ MatDuplicate_SeqSBAIJ, 1298 0, 1299 0, 1300 0, 1301 0, 1302 /*40*/ MatAXPY_SeqSBAIJ, 1303 MatGetSubMatrices_SeqSBAIJ, 1304 MatIncreaseOverlap_SeqSBAIJ, 1305 MatGetValues_SeqSBAIJ, 1306 0, 1307 /*45*/ MatPrintHelp_SeqSBAIJ, 1308 MatScale_SeqSBAIJ, 1309 0, 1310 0, 1311 0, 1312 /*50*/ MatGetBlockSize_SeqSBAIJ, 1313 MatGetRowIJ_SeqSBAIJ, 1314 MatRestoreRowIJ_SeqSBAIJ, 1315 0, 1316 0, 1317 /*55*/ 0, 1318 0, 1319 0, 1320 0, 1321 MatSetValuesBlocked_SeqSBAIJ, 1322 /*60*/ MatGetSubMatrix_SeqSBAIJ, 1323 0, 1324 0, 1325 MatGetPetscMaps_Petsc, 1326 0, 1327 /*65*/ 0, 1328 0, 1329 0, 1330 0, 1331 0, 1332 /*70*/ MatGetRowMax_SeqSBAIJ, 1333 0, 1334 0, 1335 0, 1336 0, 1337 /*75*/ 0, 1338 0, 1339 0, 1340 0, 1341 0, 1342 /*80*/ 0, 1343 0, 1344 0, 1345 #if !defined(PETSC_USE_COMPLEX) 1346 MatGetInertia_SeqSBAIJ, 1347 #else 1348 0, 1349 #endif 1350 MatLoad_SeqSBAIJ, 1351 /*85*/ MatIsSymmetric_SeqSBAIJ, 1352 MatIsHermitian_SeqSBAIJ, 1353 MatIsStructurallySymmetric_SeqSBAIJ, 1354 0, 1355 0, 1356 /*90*/ 0, 1357 0, 1358 0, 1359 0, 1360 0, 1361 /*95*/ 0, 1362 0, 1363 0, 1364 0}; 1365 1366 EXTERN_C_BEGIN 1367 #undef __FUNCT__ 1368 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1369 PetscErrorCode MatStoreValues_SeqSBAIJ(Mat mat) 1370 { 1371 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1372 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1373 PetscErrorCode ierr; 1374 1375 PetscFunctionBegin; 1376 if (aij->nonew != 1) { 1377 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1378 } 1379 1380 /* allocate space for values if not already there */ 1381 if (!aij->saved_values) { 1382 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1383 } 1384 1385 /* copy values over */ 1386 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1387 PetscFunctionReturn(0); 1388 } 1389 EXTERN_C_END 1390 1391 EXTERN_C_BEGIN 1392 #undef __FUNCT__ 1393 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1394 PetscErrorCode MatRetrieveValues_SeqSBAIJ(Mat mat) 1395 { 1396 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1397 PetscErrorCode ierr; 1398 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1399 1400 PetscFunctionBegin; 1401 if (aij->nonew != 1) { 1402 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1403 } 1404 if (!aij->saved_values) { 1405 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 1406 } 1407 1408 /* copy values over */ 1409 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1410 PetscFunctionReturn(0); 1411 } 1412 EXTERN_C_END 1413 1414 EXTERN_C_BEGIN 1415 #undef __FUNCT__ 1416 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ" 1417 PetscErrorCode MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,int bs,int nz,int *nnz) 1418 { 1419 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1420 PetscErrorCode ierr; 1421 int i,len,mbs,bs2; 1422 PetscTruth flg; 1423 1424 PetscFunctionBegin; 1425 B->preallocated = PETSC_TRUE; 1426 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1427 mbs = B->m/bs; 1428 bs2 = bs*bs; 1429 1430 if (mbs*bs != B->m) { 1431 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1432 } 1433 1434 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1435 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1436 if (nnz) { 1437 for (i=0; i<mbs; i++) { 1438 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1439 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],mbs); 1440 } 1441 } 1442 1443 ierr = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1444 if (!flg) { 1445 switch (bs) { 1446 case 1: 1447 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1448 B->ops->solve = MatSolve_SeqSBAIJ_1; 1449 B->ops->solves = MatSolves_SeqSBAIJ_1; 1450 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 1451 B->ops->mult = MatMult_SeqSBAIJ_1; 1452 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1453 break; 1454 case 2: 1455 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1456 B->ops->solve = MatSolve_SeqSBAIJ_2; 1457 B->ops->solvetranspose = MatSolve_SeqSBAIJ_2; 1458 B->ops->mult = MatMult_SeqSBAIJ_2; 1459 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1460 break; 1461 case 3: 1462 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1463 B->ops->solve = MatSolve_SeqSBAIJ_3; 1464 B->ops->solvetranspose = MatSolve_SeqSBAIJ_3; 1465 B->ops->mult = MatMult_SeqSBAIJ_3; 1466 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1467 break; 1468 case 4: 1469 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1470 B->ops->solve = MatSolve_SeqSBAIJ_4; 1471 B->ops->solvetranspose = MatSolve_SeqSBAIJ_4; 1472 B->ops->mult = MatMult_SeqSBAIJ_4; 1473 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1474 break; 1475 case 5: 1476 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1477 B->ops->solve = MatSolve_SeqSBAIJ_5; 1478 B->ops->solvetranspose = MatSolve_SeqSBAIJ_5; 1479 B->ops->mult = MatMult_SeqSBAIJ_5; 1480 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1481 break; 1482 case 6: 1483 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1484 B->ops->solve = MatSolve_SeqSBAIJ_6; 1485 B->ops->solvetranspose = MatSolve_SeqSBAIJ_6; 1486 B->ops->mult = MatMult_SeqSBAIJ_6; 1487 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1488 break; 1489 case 7: 1490 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1491 B->ops->solve = MatSolve_SeqSBAIJ_7; 1492 B->ops->solvetranspose = MatSolve_SeqSBAIJ_7; 1493 B->ops->mult = MatMult_SeqSBAIJ_7; 1494 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1495 break; 1496 } 1497 } 1498 1499 b->mbs = mbs; 1500 b->nbs = mbs; 1501 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 1502 if (!nnz) { 1503 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1504 else if (nz <= 0) nz = 1; 1505 for (i=0; i<mbs; i++) { 1506 b->imax[i] = nz; 1507 } 1508 nz = nz*mbs; /* total nz */ 1509 } else { 1510 nz = 0; 1511 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1512 } 1513 /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1514 1515 /* allocate the matrix space */ 1516 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 1517 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1518 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1519 b->j = (int*)(b->a + nz*bs2); 1520 ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 1521 b->i = b->j + nz; 1522 b->singlemalloc = PETSC_TRUE; 1523 1524 /* pointer to beginning of each row */ 1525 b->i[0] = 0; 1526 for (i=1; i<mbs+1; i++) { 1527 b->i[i] = b->i[i-1] + b->imax[i-1]; 1528 } 1529 1530 /* b->ilen will count nonzeros in each block row so far. */ 1531 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 1532 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1533 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1534 1535 b->bs = bs; 1536 b->bs2 = bs2; 1537 b->nz = 0; 1538 b->maxnz = nz*bs2; 1539 1540 b->inew = 0; 1541 b->jnew = 0; 1542 b->anew = 0; 1543 b->a2anew = 0; 1544 b->permute = PETSC_FALSE; 1545 PetscFunctionReturn(0); 1546 } 1547 EXTERN_C_END 1548 1549 EXTERN_C_BEGIN 1550 EXTERN PetscErrorCode MatConvert_SeqSBAIJ_SeqAIJ(Mat,const MatType,Mat*); 1551 EXTERN PetscErrorCode MatConvert_SeqSBAIJ_SeqBAIJ(Mat,const MatType,Mat*); 1552 EXTERN_C_END 1553 1554 /*MC 1555 MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices, 1556 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1557 1558 Options Database Keys: 1559 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions() 1560 1561 Level: beginner 1562 1563 .seealso: MatCreateSeqSBAIJ 1564 M*/ 1565 1566 EXTERN_C_BEGIN 1567 #undef __FUNCT__ 1568 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1569 PetscErrorCode MatCreate_SeqSBAIJ(Mat B) 1570 { 1571 Mat_SeqSBAIJ *b; 1572 PetscErrorCode ierr; 1573 int size; 1574 1575 PetscFunctionBegin; 1576 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1577 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1578 B->m = B->M = PetscMax(B->m,B->M); 1579 B->n = B->N = PetscMax(B->n,B->N); 1580 1581 ierr = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1582 B->data = (void*)b; 1583 ierr = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr); 1584 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1585 B->ops->destroy = MatDestroy_SeqSBAIJ; 1586 B->ops->view = MatView_SeqSBAIJ; 1587 B->factor = 0; 1588 B->lupivotthreshold = 1.0; 1589 B->mapping = 0; 1590 b->row = 0; 1591 b->icol = 0; 1592 b->reallocs = 0; 1593 b->saved_values = 0; 1594 1595 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1596 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1597 1598 b->sorted = PETSC_FALSE; 1599 b->roworiented = PETSC_TRUE; 1600 b->nonew = 0; 1601 b->diag = 0; 1602 b->solve_work = 0; 1603 b->mult_work = 0; 1604 B->spptr = 0; 1605 b->keepzeroedrows = PETSC_FALSE; 1606 b->xtoy = 0; 1607 b->XtoY = 0; 1608 1609 b->inew = 0; 1610 b->jnew = 0; 1611 b->anew = 0; 1612 b->a2anew = 0; 1613 b->permute = PETSC_FALSE; 1614 1615 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1616 "MatStoreValues_SeqSBAIJ", 1617 MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1618 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1619 "MatRetrieveValues_SeqSBAIJ", 1620 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1621 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1622 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1623 MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1624 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C", 1625 "MatConvert_SeqSBAIJ_SeqAIJ", 1626 MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr); 1627 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C", 1628 "MatConvert_SeqSBAIJ_SeqBAIJ", 1629 MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr); 1630 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C", 1631 "MatSeqSBAIJSetPreallocation_SeqSBAIJ", 1632 MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr); 1633 1634 B->symmetric = PETSC_TRUE; 1635 B->structurally_symmetric = PETSC_TRUE; 1636 B->symmetric_set = PETSC_TRUE; 1637 B->structurally_symmetric_set = PETSC_TRUE; 1638 PetscFunctionReturn(0); 1639 } 1640 EXTERN_C_END 1641 1642 #undef __FUNCT__ 1643 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1644 /*@C 1645 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1646 compressed row) format. For good matrix assembly performance the 1647 user should preallocate the matrix storage by setting the parameter nz 1648 (or the array nnz). By setting these parameters accurately, performance 1649 during matrix assembly can be increased by more than a factor of 50. 1650 1651 Collective on Mat 1652 1653 Input Parameters: 1654 + A - the symmetric matrix 1655 . bs - size of block 1656 . nz - number of block nonzeros per block row (same for all rows) 1657 - nnz - array containing the number of block nonzeros in the upper triangular plus 1658 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1659 1660 Options Database Keys: 1661 . -mat_no_unroll - uses code that does not unroll the loops in the 1662 block calculations (much slower) 1663 . -mat_block_size - size of the blocks to use 1664 1665 Level: intermediate 1666 1667 Notes: 1668 Specify the preallocated storage with either nz or nnz (not both). 1669 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1670 allocation. For additional details, see the users manual chapter on 1671 matrices. 1672 1673 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1674 @*/ 1675 PetscErrorCode MatSeqSBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[]) { 1676 PetscErrorCode ierr,(*f)(Mat,int,int,const int[]); 1677 1678 PetscFunctionBegin; 1679 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1680 if (f) { 1681 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 1682 } 1683 PetscFunctionReturn(0); 1684 } 1685 1686 #undef __FUNCT__ 1687 #define __FUNCT__ "MatCreateSeqSBAIJ" 1688 /*@C 1689 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1690 compressed row) format. For good matrix assembly performance the 1691 user should preallocate the matrix storage by setting the parameter nz 1692 (or the array nnz). By setting these parameters accurately, performance 1693 during matrix assembly can be increased by more than a factor of 50. 1694 1695 Collective on MPI_Comm 1696 1697 Input Parameters: 1698 + comm - MPI communicator, set to PETSC_COMM_SELF 1699 . bs - size of block 1700 . m - number of rows, or number of columns 1701 . nz - number of block nonzeros per block row (same for all rows) 1702 - nnz - array containing the number of block nonzeros in the upper triangular plus 1703 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1704 1705 Output Parameter: 1706 . A - the symmetric matrix 1707 1708 Options Database Keys: 1709 . -mat_no_unroll - uses code that does not unroll the loops in the 1710 block calculations (much slower) 1711 . -mat_block_size - size of the blocks to use 1712 1713 Level: intermediate 1714 1715 Notes: 1716 1717 Specify the preallocated storage with either nz or nnz (not both). 1718 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1719 allocation. For additional details, see the users manual chapter on 1720 matrices. 1721 1722 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1723 @*/ 1724 PetscErrorCode MatCreateSeqSBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A) 1725 { 1726 PetscErrorCode ierr; 1727 1728 PetscFunctionBegin; 1729 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1730 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1731 ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1732 PetscFunctionReturn(0); 1733 } 1734 1735 #undef __FUNCT__ 1736 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1737 PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1738 { 1739 Mat C; 1740 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1741 PetscErrorCode ierr; 1742 int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 1743 1744 PetscFunctionBegin; 1745 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1746 1747 *B = 0; 1748 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1749 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 1750 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1751 c = (Mat_SeqSBAIJ*)C->data; 1752 1753 C->preallocated = PETSC_TRUE; 1754 C->factor = A->factor; 1755 c->row = 0; 1756 c->icol = 0; 1757 c->saved_values = 0; 1758 c->keepzeroedrows = a->keepzeroedrows; 1759 C->assembled = PETSC_TRUE; 1760 1761 C->M = A->M; 1762 C->N = A->N; 1763 c->bs = a->bs; 1764 c->bs2 = a->bs2; 1765 c->mbs = a->mbs; 1766 c->nbs = a->nbs; 1767 1768 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1769 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1770 for (i=0; i<mbs; i++) { 1771 c->imax[i] = a->imax[i]; 1772 c->ilen[i] = a->ilen[i]; 1773 } 1774 1775 /* allocate the matrix space */ 1776 c->singlemalloc = PETSC_TRUE; 1777 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1778 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1779 c->j = (int*)(c->a + nz*bs2); 1780 c->i = c->j + nz; 1781 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1782 if (mbs > 0) { 1783 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1784 if (cpvalues == MAT_COPY_VALUES) { 1785 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1786 } else { 1787 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1788 } 1789 } 1790 1791 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1792 c->sorted = a->sorted; 1793 c->roworiented = a->roworiented; 1794 c->nonew = a->nonew; 1795 1796 if (a->diag) { 1797 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1798 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1799 for (i=0; i<mbs; i++) { 1800 c->diag[i] = a->diag[i]; 1801 } 1802 } else c->diag = 0; 1803 c->nz = a->nz; 1804 c->maxnz = a->maxnz; 1805 c->solve_work = 0; 1806 c->mult_work = 0; 1807 *B = C; 1808 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1809 PetscFunctionReturn(0); 1810 } 1811 1812 #undef __FUNCT__ 1813 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1814 PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer,const MatType type,Mat *A) 1815 { 1816 Mat_SeqSBAIJ *a; 1817 Mat B; 1818 PetscErrorCode ierr; 1819 int i,nz,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1820 int *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 1821 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1822 int *masked,nmask,tmp,bs2,ishift; 1823 PetscScalar *aa; 1824 MPI_Comm comm = ((PetscObject)viewer)->comm; 1825 1826 PetscFunctionBegin; 1827 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1828 bs2 = bs*bs; 1829 1830 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1831 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1832 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1833 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1834 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1835 M = header[1]; N = header[2]; nz = header[3]; 1836 1837 if (header[3] < 0) { 1838 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1839 } 1840 1841 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1842 1843 /* 1844 This code adds extra rows to make sure the number of rows is 1845 divisible by the blocksize 1846 */ 1847 mbs = M/bs; 1848 extra_rows = bs - M + bs*(mbs); 1849 if (extra_rows == bs) extra_rows = 0; 1850 else mbs++; 1851 if (extra_rows) { 1852 PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n"); 1853 } 1854 1855 /* read in row lengths */ 1856 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1857 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1858 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1859 1860 /* read in column indices */ 1861 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1862 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1863 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1864 1865 /* loop over row lengths determining block row lengths */ 1866 ierr = PetscMalloc(mbs*sizeof(int),&s_browlengths);CHKERRQ(ierr); 1867 ierr = PetscMemzero(s_browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1868 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1869 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1870 masked = mask + mbs; 1871 rowcount = 0; nzcount = 0; 1872 for (i=0; i<mbs; i++) { 1873 nmask = 0; 1874 for (j=0; j<bs; j++) { 1875 kmax = rowlengths[rowcount]; 1876 for (k=0; k<kmax; k++) { 1877 tmp = jj[nzcount++]/bs; /* block col. index */ 1878 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1879 } 1880 rowcount++; 1881 } 1882 s_browlengths[i] += nmask; 1883 1884 /* zero out the mask elements we set */ 1885 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1886 } 1887 1888 /* create our matrix */ 1889 ierr = MatCreate(comm,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows,&B);CHKERRQ(ierr); 1890 ierr = MatSetType(B,type);CHKERRQ(ierr); 1891 ierr = MatSeqSBAIJSetPreallocation(B,bs,0,s_browlengths);CHKERRQ(ierr); 1892 a = (Mat_SeqSBAIJ*)B->data; 1893 1894 /* set matrix "i" values */ 1895 a->i[0] = 0; 1896 for (i=1; i<= mbs; i++) { 1897 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1898 a->ilen[i-1] = s_browlengths[i-1]; 1899 } 1900 a->nz = a->i[mbs]; 1901 1902 /* read in nonzero values */ 1903 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1904 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1905 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1906 1907 /* set "a" and "j" values into matrix */ 1908 nzcount = 0; jcount = 0; 1909 for (i=0; i<mbs; i++) { 1910 nzcountb = nzcount; 1911 nmask = 0; 1912 for (j=0; j<bs; j++) { 1913 kmax = rowlengths[i*bs+j]; 1914 for (k=0; k<kmax; k++) { 1915 tmp = jj[nzcount++]/bs; /* block col. index */ 1916 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1917 } 1918 } 1919 /* sort the masked values */ 1920 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1921 1922 /* set "j" values into matrix */ 1923 maskcount = 1; 1924 for (j=0; j<nmask; j++) { 1925 a->j[jcount++] = masked[j]; 1926 mask[masked[j]] = maskcount++; 1927 } 1928 1929 /* set "a" values into matrix */ 1930 ishift = bs2*a->i[i]; 1931 for (j=0; j<bs; j++) { 1932 kmax = rowlengths[i*bs+j]; 1933 for (k=0; k<kmax; k++) { 1934 tmp = jj[nzcountb]/bs ; /* block col. index */ 1935 if (tmp >= i){ 1936 block = mask[tmp] - 1; 1937 point = jj[nzcountb] - bs*tmp; 1938 idx = ishift + bs2*block + j + bs*point; 1939 a->a[idx] = aa[nzcountb]; 1940 } 1941 nzcountb++; 1942 } 1943 } 1944 /* zero out the mask elements we set */ 1945 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1946 } 1947 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1948 1949 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1950 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1951 ierr = PetscFree(aa);CHKERRQ(ierr); 1952 ierr = PetscFree(jj);CHKERRQ(ierr); 1953 ierr = PetscFree(mask);CHKERRQ(ierr); 1954 1955 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1956 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1957 ierr = MatView_Private(B);CHKERRQ(ierr); 1958 *A = B; 1959 PetscFunctionReturn(0); 1960 } 1961 1962 #undef __FUNCT__ 1963 #define __FUNCT__ "MatRelax_SeqSBAIJ" 1964 PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx) 1965 { 1966 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1967 MatScalar *aa=a->a,*v,*v1; 1968 PetscScalar *x,*b,*t,sum,d; 1969 PetscErrorCode ierr; 1970 int m=a->mbs,bs=a->bs,*ai=a->i,*aj=a->j; 1971 int nz,nz1,*vj,*vj1,i; 1972 1973 PetscFunctionBegin; 1974 its = its*lits; 1975 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits); 1976 1977 if (bs > 1) 1978 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 1979 1980 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1981 if (xx != bb) { 1982 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1983 } else { 1984 b = x; 1985 } 1986 1987 ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr); 1988 1989 if (flag & SOR_ZERO_INITIAL_GUESS) { 1990 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1991 for (i=0; i<m; i++) 1992 t[i] = b[i]; 1993 1994 for (i=0; i<m; i++){ 1995 d = *(aa + ai[i]); /* diag[i] */ 1996 v = aa + ai[i] + 1; 1997 vj = aj + ai[i] + 1; 1998 nz = ai[i+1] - ai[i] - 1; 1999 x[i] = omega*t[i]/d; 2000 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 2001 PetscLogFlops(2*nz-1); 2002 } 2003 } 2004 2005 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2006 for (i=0; i<m; i++) 2007 t[i] = b[i]; 2008 2009 for (i=0; i<m-1; i++){ /* update rhs */ 2010 v = aa + ai[i] + 1; 2011 vj = aj + ai[i] + 1; 2012 nz = ai[i+1] - ai[i] - 1; 2013 while (nz--) t[*vj++] -= x[i]*(*v++); 2014 PetscLogFlops(2*nz-1); 2015 } 2016 for (i=m-1; i>=0; i--){ 2017 d = *(aa + ai[i]); 2018 v = aa + ai[i] + 1; 2019 vj = aj + ai[i] + 1; 2020 nz = ai[i+1] - ai[i] - 1; 2021 sum = t[i]; 2022 while (nz--) sum -= x[*vj++]*(*v++); 2023 PetscLogFlops(2*nz-1); 2024 x[i] = (1-omega)*x[i] + omega*sum/d; 2025 } 2026 } 2027 its--; 2028 } 2029 2030 while (its--) { 2031 /* 2032 forward sweep: 2033 for i=0,...,m-1: 2034 sum[i] = (b[i] - U(i,:)x )/d[i]; 2035 x[i] = (1-omega)x[i] + omega*sum[i]; 2036 b = b - x[i]*U^T(i,:); 2037 2038 */ 2039 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2040 for (i=0; i<m; i++) 2041 t[i] = b[i]; 2042 2043 for (i=0; i<m; i++){ 2044 d = *(aa + ai[i]); /* diag[i] */ 2045 v = aa + ai[i] + 1; v1=v; 2046 vj = aj + ai[i] + 1; vj1=vj; 2047 nz = ai[i+1] - ai[i] - 1; nz1=nz; 2048 sum = t[i]; 2049 while (nz1--) sum -= (*v1++)*x[*vj1++]; 2050 x[i] = (1-omega)*x[i] + omega*sum/d; 2051 while (nz--) t[*vj++] -= x[i]*(*v++); 2052 PetscLogFlops(4*nz-2); 2053 } 2054 } 2055 2056 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2057 /* 2058 backward sweep: 2059 b = b - x[i]*U^T(i,:), i=0,...,n-2 2060 for i=m-1,...,0: 2061 sum[i] = (b[i] - U(i,:)x )/d[i]; 2062 x[i] = (1-omega)x[i] + omega*sum[i]; 2063 */ 2064 for (i=0; i<m; i++) 2065 t[i] = b[i]; 2066 2067 for (i=0; i<m-1; i++){ /* update rhs */ 2068 v = aa + ai[i] + 1; 2069 vj = aj + ai[i] + 1; 2070 nz = ai[i+1] - ai[i] - 1; 2071 while (nz--) t[*vj++] -= x[i]*(*v++); 2072 PetscLogFlops(2*nz-1); 2073 } 2074 for (i=m-1; i>=0; i--){ 2075 d = *(aa + ai[i]); 2076 v = aa + ai[i] + 1; 2077 vj = aj + ai[i] + 1; 2078 nz = ai[i+1] - ai[i] - 1; 2079 sum = t[i]; 2080 while (nz--) sum -= x[*vj++]*(*v++); 2081 PetscLogFlops(2*nz-1); 2082 x[i] = (1-omega)*x[i] + omega*sum/d; 2083 } 2084 } 2085 } 2086 2087 ierr = PetscFree(t);CHKERRQ(ierr); 2088 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2089 if (bb != xx) { 2090 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2091 } 2092 2093 PetscFunctionReturn(0); 2094 } 2095 2096 2097 2098 2099 2100 2101