1 2 /* 3 Defines the basic matrix operations for the SBAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 7 #include "src/inline/spops.h" 8 #include "src/mat/impls/sbaij/seq/sbaij.h" 9 10 #define CHUNKSIZE 10 11 12 /* 13 Checks for missing diagonals 14 */ 15 #undef __FUNCT__ 16 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 17 PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A) 18 { 19 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 20 PetscErrorCode ierr; 21 PetscInt *diag,*jj = a->j,i; 22 23 PetscFunctionBegin; 24 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 25 diag = a->diag; 26 for (i=0; i<a->mbs; i++) { 27 if (jj[diag[i]] != i) SETERRQ1(PETSC_ERR_ARG_CORRUPT,"Matrix is missing diagonal number %D",i); 28 } 29 PetscFunctionReturn(0); 30 } 31 32 #undef __FUNCT__ 33 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 34 PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A) 35 { 36 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 37 PetscErrorCode ierr; 38 PetscInt i,mbs = a->mbs; 39 40 PetscFunctionBegin; 41 if (a->diag) PetscFunctionReturn(0); 42 43 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 44 ierr = PetscLogObjectMemory(A,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 45 for (i=0; i<mbs; i++) a->diag[i] = a->i[i]; 46 PetscFunctionReturn(0); 47 } 48 49 #undef __FUNCT__ 50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 51 static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 52 { 53 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 54 PetscInt n = a->mbs,i; 55 56 PetscFunctionBegin; 57 *nn = n; 58 if (!ia) PetscFunctionReturn(0); 59 60 if (oshift == 1) { 61 /* temporarily add 1 to i and j indices */ 62 PetscInt nz = a->i[n]; 63 for (i=0; i<nz; i++) a->j[i]++; 64 for (i=0; i<n+1; i++) a->i[i]++; 65 *ia = a->i; *ja = a->j; 66 } else { 67 *ia = a->i; *ja = a->j; 68 } 69 PetscFunctionReturn(0); 70 } 71 72 #undef __FUNCT__ 73 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 74 static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 75 { 76 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 77 PetscInt i,n = a->mbs; 78 79 PetscFunctionBegin; 80 if (!ia) PetscFunctionReturn(0); 81 82 if (oshift == 1) { 83 PetscInt nz = a->i[n]-1; 84 for (i=0; i<nz; i++) a->j[i]--; 85 for (i=0; i<n+1; i++) a->i[i]--; 86 } 87 PetscFunctionReturn(0); 88 } 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 92 PetscErrorCode MatDestroy_SeqSBAIJ(Mat A) 93 { 94 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 95 PetscErrorCode ierr; 96 97 PetscFunctionBegin; 98 PetscLogObjectState((PetscObject)A,"Rows=%D, NZ=%D",A->m,a->nz); 99 ierr = PetscFree(a->a);CHKERRQ(ierr); 100 if (!a->singlemalloc) { 101 ierr = PetscFree(a->i);CHKERRQ(ierr); 102 ierr = PetscFree(a->j);CHKERRQ(ierr); 103 } 104 if (a->row) { 105 ierr = ISDestroy(a->row);CHKERRQ(ierr); 106 } 107 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 108 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 109 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 110 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 111 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 112 if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);} 113 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 114 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 115 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 116 117 if (a->inew){ 118 ierr = PetscFree(a->inew);CHKERRQ(ierr); 119 a->inew = 0; 120 } 121 ierr = PetscFree(a);CHKERRQ(ierr); 122 123 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 124 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 125 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 126 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 127 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqbaij_C","",PETSC_NULL);CHKERRQ(ierr); 128 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 129 PetscFunctionReturn(0); 130 } 131 132 #undef __FUNCT__ 133 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 134 PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op) 135 { 136 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 137 138 PetscFunctionBegin; 139 switch (op) { 140 case MAT_ROW_ORIENTED: 141 a->roworiented = PETSC_TRUE; 142 break; 143 case MAT_COLUMN_ORIENTED: 144 a->roworiented = PETSC_FALSE; 145 break; 146 case MAT_COLUMNS_SORTED: 147 a->sorted = PETSC_TRUE; 148 break; 149 case MAT_COLUMNS_UNSORTED: 150 a->sorted = PETSC_FALSE; 151 break; 152 case MAT_KEEP_ZEROED_ROWS: 153 a->keepzeroedrows = PETSC_TRUE; 154 break; 155 case MAT_NO_NEW_NONZERO_LOCATIONS: 156 a->nonew = 1; 157 break; 158 case MAT_NEW_NONZERO_LOCATION_ERR: 159 a->nonew = -1; 160 break; 161 case MAT_NEW_NONZERO_ALLOCATION_ERR: 162 a->nonew = -2; 163 break; 164 case MAT_YES_NEW_NONZERO_LOCATIONS: 165 a->nonew = 0; 166 break; 167 case MAT_ROWS_SORTED: 168 case MAT_ROWS_UNSORTED: 169 case MAT_YES_NEW_DIAGONALS: 170 case MAT_IGNORE_OFF_PROC_ENTRIES: 171 case MAT_USE_HASH_TABLE: 172 PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n"); 173 break; 174 case MAT_NO_NEW_DIAGONALS: 175 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 176 case MAT_NOT_SYMMETRIC: 177 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 178 case MAT_HERMITIAN: 179 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 180 case MAT_SYMMETRIC: 181 case MAT_STRUCTURALLY_SYMMETRIC: 182 case MAT_NOT_HERMITIAN: 183 case MAT_SYMMETRY_ETERNAL: 184 case MAT_NOT_SYMMETRY_ETERNAL: 185 break; 186 default: 187 SETERRQ(PETSC_ERR_SUP,"unknown option"); 188 } 189 PetscFunctionReturn(0); 190 } 191 192 #undef __FUNCT__ 193 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 194 PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **v) 195 { 196 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 197 PetscErrorCode ierr; 198 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2; 199 MatScalar *aa,*aa_i; 200 PetscScalar *v_i; 201 202 PetscFunctionBegin; 203 bs = A->bs; 204 ai = a->i; 205 aj = a->j; 206 aa = a->a; 207 bs2 = a->bs2; 208 209 if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %D out of range", row); 210 211 bn = row/bs; /* Block number */ 212 bp = row % bs; /* Block position */ 213 M = ai[bn+1] - ai[bn]; 214 *ncols = bs*M; 215 216 if (v) { 217 *v = 0; 218 if (*ncols) { 219 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 220 for (i=0; i<M; i++) { /* for each block in the block row */ 221 v_i = *v + i*bs; 222 aa_i = aa + bs2*(ai[bn] + i); 223 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 224 } 225 } 226 } 227 228 if (cols) { 229 *cols = 0; 230 if (*ncols) { 231 ierr = PetscMalloc((*ncols+row)*sizeof(PetscInt),cols);CHKERRQ(ierr); 232 for (i=0; i<M; i++) { /* for each block in the block row */ 233 cols_i = *cols + i*bs; 234 itmp = bs*aj[ai[bn] + i]; 235 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 236 } 237 } 238 } 239 240 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 241 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 242 #ifdef column_search 243 v_i = *v + M*bs; 244 cols_i = *cols + M*bs; 245 for (i=0; i<bn; i++){ /* for each block row */ 246 M = ai[i+1] - ai[i]; 247 for (j=0; j<M; j++){ 248 itmp = aj[ai[i] + j]; /* block column value */ 249 if (itmp == bn){ 250 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 251 for (k=0; k<bs; k++) { 252 *cols_i++ = i*bs+k; 253 *v_i++ = aa_i[k]; 254 } 255 *ncols += bs; 256 break; 257 } 258 } 259 } 260 #endif 261 PetscFunctionReturn(0); 262 } 263 264 #undef __FUNCT__ 265 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 266 PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 267 { 268 PetscErrorCode ierr; 269 270 PetscFunctionBegin; 271 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 272 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 273 PetscFunctionReturn(0); 274 } 275 276 #undef __FUNCT__ 277 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 278 PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,Mat *B) 279 { 280 PetscErrorCode ierr; 281 PetscFunctionBegin; 282 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 283 PetscFunctionReturn(0); 284 } 285 286 #undef __FUNCT__ 287 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 288 static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 289 { 290 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 291 PetscErrorCode ierr; 292 PetscInt i,j,bs = A->bs,k,l,bs2=a->bs2; 293 char *name; 294 PetscViewerFormat format; 295 296 PetscFunctionBegin; 297 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 298 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 299 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 300 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 301 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 302 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 303 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 304 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 305 for (i=0; i<a->mbs; i++) { 306 for (j=0; j<bs; j++) { 307 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 308 for (k=a->i[i]; k<a->i[i+1]; k++) { 309 for (l=0; l<bs; l++) { 310 #if defined(PETSC_USE_COMPLEX) 311 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 312 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 313 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 314 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 315 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 316 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 317 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 318 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 319 } 320 #else 321 if (a->a[bs2*k + l*bs + j] != 0.0) { 322 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 323 } 324 #endif 325 } 326 } 327 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 328 } 329 } 330 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 331 } else { 332 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 333 for (i=0; i<a->mbs; i++) { 334 for (j=0; j<bs; j++) { 335 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 336 for (k=a->i[i]; k<a->i[i+1]; k++) { 337 for (l=0; l<bs; l++) { 338 #if defined(PETSC_USE_COMPLEX) 339 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 340 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 341 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 342 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 343 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 344 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 345 } else { 346 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 347 } 348 #else 349 ierr = PetscViewerASCIIPrintf(viewer," %D %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 350 #endif 351 } 352 } 353 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 354 } 355 } 356 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 357 } 358 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 359 PetscFunctionReturn(0); 360 } 361 362 #undef __FUNCT__ 363 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 364 static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 365 { 366 Mat A = (Mat) Aa; 367 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 368 PetscErrorCode ierr; 369 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->bs,bs2=a->bs2; 370 PetscMPIInt rank; 371 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 372 MatScalar *aa; 373 MPI_Comm comm; 374 PetscViewer viewer; 375 376 PetscFunctionBegin; 377 /* 378 This is nasty. If this is called from an originally parallel matrix 379 then all processes call this,but only the first has the matrix so the 380 rest should return immediately. 381 */ 382 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 383 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 384 if (rank) PetscFunctionReturn(0); 385 386 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 387 388 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 389 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 390 391 /* loop over matrix elements drawing boxes */ 392 color = PETSC_DRAW_BLUE; 393 for (i=0,row=0; i<mbs; i++,row+=bs) { 394 for (j=a->i[i]; j<a->i[i+1]; j++) { 395 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 396 x_l = a->j[j]*bs; x_r = x_l + 1.0; 397 aa = a->a + j*bs2; 398 for (k=0; k<bs; k++) { 399 for (l=0; l<bs; l++) { 400 if (PetscRealPart(*aa++) >= 0.) continue; 401 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 402 } 403 } 404 } 405 } 406 color = PETSC_DRAW_CYAN; 407 for (i=0,row=0; i<mbs; i++,row+=bs) { 408 for (j=a->i[i]; j<a->i[i+1]; j++) { 409 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 410 x_l = a->j[j]*bs; x_r = x_l + 1.0; 411 aa = a->a + j*bs2; 412 for (k=0; k<bs; k++) { 413 for (l=0; l<bs; l++) { 414 if (PetscRealPart(*aa++) != 0.) continue; 415 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 416 } 417 } 418 } 419 } 420 421 color = PETSC_DRAW_RED; 422 for (i=0,row=0; i<mbs; i++,row+=bs) { 423 for (j=a->i[i]; j<a->i[i+1]; j++) { 424 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 425 x_l = a->j[j]*bs; x_r = x_l + 1.0; 426 aa = a->a + j*bs2; 427 for (k=0; k<bs; k++) { 428 for (l=0; l<bs; l++) { 429 if (PetscRealPart(*aa++) <= 0.) continue; 430 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 431 } 432 } 433 } 434 } 435 PetscFunctionReturn(0); 436 } 437 438 #undef __FUNCT__ 439 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 440 static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 441 { 442 PetscErrorCode ierr; 443 PetscReal xl,yl,xr,yr,w,h; 444 PetscDraw draw; 445 PetscTruth isnull; 446 447 PetscFunctionBegin; 448 449 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 450 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 451 452 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 453 xr = A->m; yr = A->m; h = yr/10.0; w = xr/10.0; 454 xr += w; yr += h; xl = -w; yl = -h; 455 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 456 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 457 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 458 PetscFunctionReturn(0); 459 } 460 461 #undef __FUNCT__ 462 #define __FUNCT__ "MatView_SeqSBAIJ" 463 PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 464 { 465 PetscErrorCode ierr; 466 PetscTruth iascii,isdraw; 467 468 PetscFunctionBegin; 469 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 470 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 471 if (iascii){ 472 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 473 } else if (isdraw) { 474 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 475 } else { 476 Mat B; 477 ierr = MatConvert(A,MATSEQAIJ,&B);CHKERRQ(ierr); 478 ierr = MatView(B,viewer);CHKERRQ(ierr); 479 ierr = MatDestroy(B);CHKERRQ(ierr); 480 } 481 PetscFunctionReturn(0); 482 } 483 484 485 #undef __FUNCT__ 486 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 487 PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 488 { 489 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 490 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 491 PetscInt *ai = a->i,*ailen = a->ilen; 492 PetscInt brow,bcol,ridx,cidx,bs=A->bs,bs2=a->bs2; 493 MatScalar *ap,*aa = a->a,zero = 0.0; 494 495 PetscFunctionBegin; 496 for (k=0; k<m; k++) { /* loop over rows */ 497 row = im[k]; brow = row/bs; 498 if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",row); 499 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1); 500 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 501 nrow = ailen[brow]; 502 for (l=0; l<n; l++) { /* loop over columns */ 503 if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",in[l]); 504 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->n-1); 505 col = in[l] ; 506 bcol = col/bs; 507 cidx = col%bs; 508 ridx = row%bs; 509 high = nrow; 510 low = 0; /* assume unsorted */ 511 while (high-low > 5) { 512 t = (low+high)/2; 513 if (rp[t] > bcol) high = t; 514 else low = t; 515 } 516 for (i=low; i<high; i++) { 517 if (rp[i] > bcol) break; 518 if (rp[i] == bcol) { 519 *v++ = ap[bs2*i+bs*cidx+ridx]; 520 goto finished; 521 } 522 } 523 *v++ = zero; 524 finished:; 525 } 526 } 527 PetscFunctionReturn(0); 528 } 529 530 531 #undef __FUNCT__ 532 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 533 PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 534 { 535 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 536 PetscErrorCode ierr; 537 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 538 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 539 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->bs,stepval; 540 PetscTruth roworiented=a->roworiented; 541 const MatScalar *value = v; 542 MatScalar *ap,*aa = a->a,*bap; 543 544 PetscFunctionBegin; 545 if (roworiented) { 546 stepval = (n-1)*bs; 547 } else { 548 stepval = (m-1)*bs; 549 } 550 for (k=0; k<m; k++) { /* loop over added rows */ 551 row = im[k]; 552 if (row < 0) continue; 553 #if defined(PETSC_USE_DEBUG) 554 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 555 #endif 556 rp = aj + ai[row]; 557 ap = aa + bs2*ai[row]; 558 rmax = imax[row]; 559 nrow = ailen[row]; 560 low = 0; 561 for (l=0; l<n; l++) { /* loop over added columns */ 562 if (in[l] < 0) continue; 563 col = in[l]; 564 #if defined(PETSC_USE_DEBUG) 565 if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",col,a->nbs-1); 566 #endif 567 if (col < row) continue; /* ignore lower triangular block */ 568 if (roworiented) { 569 value = v + k*(stepval+bs)*bs + l*bs; 570 } else { 571 value = v + l*(stepval+bs)*bs + k*bs; 572 } 573 if (col < lastcol) low = 0; high = nrow; 574 lastcol = col; 575 while (high-low > 7) { 576 t = (low+high)/2; 577 if (rp[t] > col) high = t; 578 else low = t; 579 } 580 for (i=low; i<high; i++) { 581 if (rp[i] > col) break; 582 if (rp[i] == col) { 583 bap = ap + bs2*i; 584 if (roworiented) { 585 if (is == ADD_VALUES) { 586 for (ii=0; ii<bs; ii++,value+=stepval) { 587 for (jj=ii; jj<bs2; jj+=bs) { 588 bap[jj] += *value++; 589 } 590 } 591 } else { 592 for (ii=0; ii<bs; ii++,value+=stepval) { 593 for (jj=ii; jj<bs2; jj+=bs) { 594 bap[jj] = *value++; 595 } 596 } 597 } 598 } else { 599 if (is == ADD_VALUES) { 600 for (ii=0; ii<bs; ii++,value+=stepval) { 601 for (jj=0; jj<bs; jj++) { 602 *bap++ += *value++; 603 } 604 } 605 } else { 606 for (ii=0; ii<bs; ii++,value+=stepval) { 607 for (jj=0; jj<bs; jj++) { 608 *bap++ = *value++; 609 } 610 } 611 } 612 } 613 goto noinsert2; 614 } 615 } 616 if (nonew == 1) goto noinsert2; 617 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 618 if (nrow >= rmax) { 619 /* there is no extra room in row, therefore enlarge */ 620 PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 621 MatScalar *new_a; 622 623 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 624 625 /* malloc new storage space */ 626 len = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); 627 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 628 new_j = (PetscInt*)(new_a + bs2*new_nz); 629 new_i = new_j + new_nz; 630 631 /* copy over old data into new slots */ 632 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 633 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 634 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); 635 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 636 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); 637 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 638 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 639 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 640 /* free up old matrix storage */ 641 ierr = PetscFree(a->a);CHKERRQ(ierr); 642 if (!a->singlemalloc) { 643 ierr = PetscFree(a->i);CHKERRQ(ierr); 644 ierr = PetscFree(a->j);CHKERRQ(ierr); 645 } 646 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 647 a->singlemalloc = PETSC_TRUE; 648 649 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 650 rmax = imax[row] = imax[row] + CHUNKSIZE; 651 ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); 652 a->maxnz += bs2*CHUNKSIZE; 653 a->reallocs++; 654 a->nz++; 655 } 656 N = nrow++ - 1; 657 /* shift up all the later entries in this row */ 658 for (ii=N; ii>=i; ii--) { 659 rp[ii+1] = rp[ii]; 660 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 661 } 662 if (N >= i) { 663 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 664 } 665 rp[i] = col; 666 bap = ap + bs2*i; 667 if (roworiented) { 668 for (ii=0; ii<bs; ii++,value+=stepval) { 669 for (jj=ii; jj<bs2; jj+=bs) { 670 bap[jj] = *value++; 671 } 672 } 673 } else { 674 for (ii=0; ii<bs; ii++,value+=stepval) { 675 for (jj=0; jj<bs; jj++) { 676 *bap++ = *value++; 677 } 678 } 679 } 680 noinsert2:; 681 low = i; 682 } 683 ailen[row] = nrow; 684 } 685 PetscFunctionReturn(0); 686 } 687 688 #undef __FUNCT__ 689 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 690 PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 691 { 692 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 693 PetscErrorCode ierr; 694 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 695 PetscInt m = A->m,*ip,N,*ailen = a->ilen; 696 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 697 MatScalar *aa = a->a,*ap; 698 699 PetscFunctionBegin; 700 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 701 702 if (m) rmax = ailen[0]; 703 for (i=1; i<mbs; i++) { 704 /* move each row back by the amount of empty slots (fshift) before it*/ 705 fshift += imax[i-1] - ailen[i-1]; 706 rmax = PetscMax(rmax,ailen[i]); 707 if (fshift) { 708 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 709 N = ailen[i]; 710 for (j=0; j<N; j++) { 711 ip[j-fshift] = ip[j]; 712 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 713 } 714 } 715 ai[i] = ai[i-1] + ailen[i-1]; 716 } 717 if (mbs) { 718 fshift += imax[mbs-1] - ailen[mbs-1]; 719 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 720 } 721 /* reset ilen and imax for each row */ 722 for (i=0; i<mbs; i++) { 723 ailen[i] = imax[i] = ai[i+1] - ai[i]; 724 } 725 a->nz = ai[mbs]; 726 727 /* diagonals may have moved, reset it */ 728 if (a->diag) { 729 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 730 } 731 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n", 732 m,A->m,A->bs,fshift*bs2,a->nz*bs2); 733 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %D\n", 734 a->reallocs); 735 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %D\n",rmax); 736 a->reallocs = 0; 737 A->info.nz_unneeded = (PetscReal)fshift*bs2; 738 PetscFunctionReturn(0); 739 } 740 741 /* 742 This function returns an array of flags which indicate the locations of contiguous 743 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 744 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 745 Assume: sizes should be long enough to hold all the values. 746 */ 747 #undef __FUNCT__ 748 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 749 PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 750 { 751 PetscInt i,j,k,row; 752 PetscTruth flg; 753 754 PetscFunctionBegin; 755 for (i=0,j=0; i<n; j++) { 756 row = idx[i]; 757 if (row%bs!=0) { /* Not the begining of a block */ 758 sizes[j] = 1; 759 i++; 760 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 761 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 762 i++; 763 } else { /* Begining of the block, so check if the complete block exists */ 764 flg = PETSC_TRUE; 765 for (k=1; k<bs; k++) { 766 if (row+k != idx[i+k]) { /* break in the block */ 767 flg = PETSC_FALSE; 768 break; 769 } 770 } 771 if (flg) { /* No break in the bs */ 772 sizes[j] = bs; 773 i+= bs; 774 } else { 775 sizes[j] = 1; 776 i++; 777 } 778 } 779 } 780 *bs_max = j; 781 PetscFunctionReturn(0); 782 } 783 784 785 /* Only add/insert a(i,j) with i<=j (blocks). 786 Any a(i,j) with i>j input by user is ingored. 787 */ 788 789 #undef __FUNCT__ 790 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 791 PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 792 { 793 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 794 PetscErrorCode ierr; 795 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 796 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 797 PetscInt *aj=a->j,nonew=a->nonew,bs=A->bs,brow,bcol; 798 PetscInt ridx,cidx,bs2=a->bs2; 799 MatScalar *ap,value,*aa=a->a,*bap; 800 801 PetscFunctionBegin; 802 for (k=0; k<m; k++) { /* loop over added rows */ 803 row = im[k]; /* row number */ 804 brow = row/bs; /* block row number */ 805 if (row < 0) continue; 806 #if defined(PETSC_USE_DEBUG) 807 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1); 808 #endif 809 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 810 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 811 rmax = imax[brow]; /* maximum space allocated for this row */ 812 nrow = ailen[brow]; /* actual length of this row */ 813 low = 0; 814 815 for (l=0; l<n; l++) { /* loop over added columns */ 816 if (in[l] < 0) continue; 817 #if defined(PETSC_USE_DEBUG) 818 if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->m-1); 819 #endif 820 col = in[l]; 821 bcol = col/bs; /* block col number */ 822 823 if (brow > bcol) continue; /* ignore lower triangular values */ 824 825 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 826 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 827 /* element value a(k,l) */ 828 if (roworiented) { 829 value = v[l + k*n]; 830 } else { 831 value = v[k + l*m]; 832 } 833 834 /* move pointer bap to a(k,l) quickly and add/insert value */ 835 if (col < lastcol) low = 0; high = nrow; 836 lastcol = col; 837 while (high-low > 7) { 838 t = (low+high)/2; 839 if (rp[t] > bcol) high = t; 840 else low = t; 841 } 842 for (i=low; i<high; i++) { 843 if (rp[i] > bcol) break; 844 if (rp[i] == bcol) { 845 bap = ap + bs2*i + bs*cidx + ridx; 846 if (is == ADD_VALUES) *bap += value; 847 else *bap = value; 848 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 849 if (brow == bcol && ridx < cidx){ 850 bap = ap + bs2*i + bs*ridx + cidx; 851 if (is == ADD_VALUES) *bap += value; 852 else *bap = value; 853 } 854 goto noinsert1; 855 } 856 } 857 858 if (nonew == 1) goto noinsert1; 859 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 860 if (nrow >= rmax) { 861 /* there is no extra room in row, therefore enlarge */ 862 PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 863 MatScalar *new_a; 864 865 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 866 867 /* Malloc new storage space */ 868 len = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); 869 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 870 new_j = (PetscInt*)(new_a + bs2*new_nz); 871 new_i = new_j + new_nz; 872 873 /* copy over old data into new slots */ 874 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 875 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 876 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); 877 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 878 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); 879 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 880 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 881 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 882 /* free up old matrix storage */ 883 ierr = PetscFree(a->a);CHKERRQ(ierr); 884 if (!a->singlemalloc) { 885 ierr = PetscFree(a->i);CHKERRQ(ierr); 886 ierr = PetscFree(a->j);CHKERRQ(ierr); 887 } 888 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 889 a->singlemalloc = PETSC_TRUE; 890 891 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 892 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 893 ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); 894 a->maxnz += bs2*CHUNKSIZE; 895 a->reallocs++; 896 a->nz++; 897 } 898 899 N = nrow++ - 1; 900 /* shift up all the later entries in this row */ 901 for (ii=N; ii>=i; ii--) { 902 rp[ii+1] = rp[ii]; 903 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 904 } 905 if (N>=i) { 906 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 907 } 908 rp[i] = bcol; 909 ap[bs2*i + bs*cidx + ridx] = value; 910 noinsert1:; 911 low = i; 912 } 913 } /* end of loop over added columns */ 914 ailen[brow] = nrow; 915 } /* end of loop over added rows */ 916 PetscFunctionReturn(0); 917 } 918 919 EXTERN PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*); 920 EXTERN PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*); 921 EXTERN PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat,PetscInt,IS[],PetscInt); 922 EXTERN PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,PetscInt,MatReuse,Mat*); 923 EXTERN PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat*[]); 924 EXTERN PetscErrorCode MatScale_SeqSBAIJ(const PetscScalar*,Mat); 925 EXTERN PetscErrorCode MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *); 926 EXTERN PetscErrorCode MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*); 927 EXTERN PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat,Vec); 928 EXTERN PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec); 929 EXTERN PetscErrorCode MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *); 930 EXTERN PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat); 931 EXTERN PetscErrorCode MatGetRowMax_SeqSBAIJ(Mat,Vec); 932 933 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N(Mat,Vec,Vec); 934 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1(Mat,Vec,Vec); 935 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2(Mat,Vec,Vec); 936 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3(Mat,Vec,Vec); 937 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4(Mat,Vec,Vec); 938 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5(Mat,Vec,Vec); 939 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6(Mat,Vec,Vec); 940 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7(Mat,Vec,Vec); 941 942 EXTERN PetscErrorCode MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs); 943 944 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec); 945 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec); 946 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec); 947 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 948 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec); 949 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec); 950 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec); 951 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec); 952 953 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,MatFactorInfo*,Mat*); 954 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,MatFactorInfo*,Mat*); 955 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,MatFactorInfo*,Mat*); 956 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,MatFactorInfo*,Mat*); 957 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,MatFactorInfo*,Mat*); 958 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,MatFactorInfo*,Mat*); 959 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,MatFactorInfo*,Mat*); 960 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,MatFactorInfo*,Mat*); 961 EXTERN PetscErrorCode MatGetInertia_SeqSBAIJ(Mat,PetscInt*,PetscInt*,PetscInt*); 962 963 EXTERN PetscErrorCode MatMult_SeqSBAIJ_1(Mat,Vec,Vec); 964 EXTERN PetscErrorCode MatMult_SeqSBAIJ_2(Mat,Vec,Vec); 965 EXTERN PetscErrorCode MatMult_SeqSBAIJ_3(Mat,Vec,Vec); 966 EXTERN PetscErrorCode MatMult_SeqSBAIJ_4(Mat,Vec,Vec); 967 EXTERN PetscErrorCode MatMult_SeqSBAIJ_5(Mat,Vec,Vec); 968 EXTERN PetscErrorCode MatMult_SeqSBAIJ_6(Mat,Vec,Vec); 969 EXTERN PetscErrorCode MatMult_SeqSBAIJ_7(Mat,Vec,Vec); 970 EXTERN PetscErrorCode MatMult_SeqSBAIJ_N(Mat,Vec,Vec); 971 972 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec); 973 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec); 974 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec); 975 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec); 976 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec); 977 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec); 978 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec); 979 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec); 980 981 #ifdef HAVE_MatICCFactor 982 /* modified from MatILUFactor_SeqSBAIJ, needs further work! */ 983 #undef __FUNCT__ 984 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 985 PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info) 986 { 987 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 988 Mat outA; 989 PetscErrorCode ierr; 990 PetscTruth row_identity,col_identity; 991 992 PetscFunctionBegin; 993 outA = inA; 994 inA->factor = FACTOR_CHOLESKY; 995 996 if (!a->diag) { 997 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 998 } 999 /* 1000 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1001 for ILU(0) factorization with natural ordering 1002 */ 1003 switch (a->bs) { 1004 case 1: 1005 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1006 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1007 inA->ops->solves = MatSolves_SeqSBAIJ_1; 1008 PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n"); 1009 case 2: 1010 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 1011 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1012 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1013 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1014 break; 1015 case 3: 1016 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 1017 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1018 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1019 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n"); 1020 break; 1021 case 4: 1022 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 1023 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1024 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1025 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1026 break; 1027 case 5: 1028 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 1029 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1030 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1031 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1032 break; 1033 case 6: 1034 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 1035 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1036 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1037 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1038 break; 1039 case 7: 1040 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 1041 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1042 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1043 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1044 break; 1045 default: 1046 a->row = row; 1047 a->icol = col; 1048 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1049 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1050 1051 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1052 ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr); 1053 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 1054 1055 if (!a->solve_work) { 1056 ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1057 ierr = PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 1058 } 1059 } 1060 1061 ierr = MatCholeskyFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 1062 PetscFunctionReturn(0); 1063 } 1064 #endif 1065 1066 #undef __FUNCT__ 1067 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ" 1068 PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat A) 1069 { 1070 static PetscTruth called = PETSC_FALSE; 1071 MPI_Comm comm = A->comm; 1072 PetscErrorCode ierr; 1073 1074 PetscFunctionBegin; 1075 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1076 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1077 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1078 PetscFunctionReturn(0); 1079 } 1080 1081 EXTERN_C_BEGIN 1082 #undef __FUNCT__ 1083 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1084 PetscErrorCode MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,PetscInt *indices) 1085 { 1086 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1087 PetscInt i,nz,n; 1088 1089 PetscFunctionBegin; 1090 nz = baij->maxnz; 1091 n = mat->n; 1092 for (i=0; i<nz; i++) { 1093 baij->j[i] = indices[i]; 1094 } 1095 baij->nz = nz; 1096 for (i=0; i<n; i++) { 1097 baij->ilen[i] = baij->imax[i]; 1098 } 1099 PetscFunctionReturn(0); 1100 } 1101 EXTERN_C_END 1102 1103 #undef __FUNCT__ 1104 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1105 /*@ 1106 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1107 in the matrix. 1108 1109 Input Parameters: 1110 + mat - the SeqSBAIJ matrix 1111 - indices - the column indices 1112 1113 Level: advanced 1114 1115 Notes: 1116 This can be called if you have precomputed the nonzero structure of the 1117 matrix and want to provide it to the matrix object to improve the performance 1118 of the MatSetValues() operation. 1119 1120 You MUST have set the correct numbers of nonzeros per row in the call to 1121 MatCreateSeqSBAIJ(), and the columns indices MUST be sorted. 1122 1123 MUST be called before any calls to MatSetValues() 1124 1125 .seealso: MatCreateSeqSBAIJ 1126 @*/ 1127 PetscErrorCode MatSeqSBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1128 { 1129 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1130 1131 PetscFunctionBegin; 1132 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1133 PetscValidPointer(indices,2); 1134 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1135 if (f) { 1136 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1137 } else { 1138 SETERRQ(PETSC_ERR_SUP,"Wrong type of matrix to set column indices"); 1139 } 1140 PetscFunctionReturn(0); 1141 } 1142 1143 #undef __FUNCT__ 1144 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1145 PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A) 1146 { 1147 PetscErrorCode ierr; 1148 1149 PetscFunctionBegin; 1150 ierr = MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1151 PetscFunctionReturn(0); 1152 } 1153 1154 #undef __FUNCT__ 1155 #define __FUNCT__ "MatGetArray_SeqSBAIJ" 1156 PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1157 { 1158 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1159 PetscFunctionBegin; 1160 *array = a->a; 1161 PetscFunctionReturn(0); 1162 } 1163 1164 #undef __FUNCT__ 1165 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ" 1166 PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1167 { 1168 PetscFunctionBegin; 1169 PetscFunctionReturn(0); 1170 } 1171 1172 #include "petscblaslapack.h" 1173 #undef __FUNCT__ 1174 #define __FUNCT__ "MatAXPY_SeqSBAIJ" 1175 PetscErrorCode MatAXPY_SeqSBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str) 1176 { 1177 Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data; 1178 PetscErrorCode ierr; 1179 PetscInt i,bs=Y->bs,bs2,j; 1180 PetscBLASInt bnz = (PetscBLASInt)x->nz,one = 1; 1181 1182 PetscFunctionBegin; 1183 if (str == SAME_NONZERO_PATTERN) { 1184 BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1185 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1186 if (y->xtoy && y->XtoY != X) { 1187 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1188 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1189 } 1190 if (!y->xtoy) { /* get xtoy */ 1191 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1192 y->XtoY = X; 1193 } 1194 bs2 = bs*bs; 1195 for (i=0; i<x->nz; i++) { 1196 j = 0; 1197 while (j < bs2){ 1198 y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]); 1199 j++; 1200 } 1201 } 1202 PetscLogInfo(0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)); 1203 } else { 1204 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1205 } 1206 PetscFunctionReturn(0); 1207 } 1208 1209 #undef __FUNCT__ 1210 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ" 1211 PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg) 1212 { 1213 PetscFunctionBegin; 1214 *flg = PETSC_TRUE; 1215 PetscFunctionReturn(0); 1216 } 1217 1218 #undef __FUNCT__ 1219 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ" 1220 PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg) 1221 { 1222 PetscFunctionBegin; 1223 *flg = PETSC_TRUE; 1224 PetscFunctionReturn(0); 1225 } 1226 1227 #undef __FUNCT__ 1228 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ" 1229 PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscTruth *flg) 1230 { 1231 PetscFunctionBegin; 1232 *flg = PETSC_FALSE; 1233 PetscFunctionReturn(0); 1234 } 1235 1236 /* -------------------------------------------------------------------*/ 1237 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1238 MatGetRow_SeqSBAIJ, 1239 MatRestoreRow_SeqSBAIJ, 1240 MatMult_SeqSBAIJ_N, 1241 /* 4*/ MatMultAdd_SeqSBAIJ_N, 1242 MatMult_SeqSBAIJ_N, 1243 MatMultAdd_SeqSBAIJ_N, 1244 MatSolve_SeqSBAIJ_N, 1245 0, 1246 0, 1247 /*10*/ 0, 1248 0, 1249 MatCholeskyFactor_SeqSBAIJ, 1250 MatRelax_SeqSBAIJ, 1251 MatTranspose_SeqSBAIJ, 1252 /*15*/ MatGetInfo_SeqSBAIJ, 1253 MatEqual_SeqSBAIJ, 1254 MatGetDiagonal_SeqSBAIJ, 1255 MatDiagonalScale_SeqSBAIJ, 1256 MatNorm_SeqSBAIJ, 1257 /*20*/ 0, 1258 MatAssemblyEnd_SeqSBAIJ, 1259 0, 1260 MatSetOption_SeqSBAIJ, 1261 MatZeroEntries_SeqSBAIJ, 1262 /*25*/ 0, 1263 0, 1264 0, 1265 MatCholeskyFactorSymbolic_SeqSBAIJ, 1266 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1267 /*30*/ MatSetUpPreallocation_SeqSBAIJ, 1268 0, 1269 MatICCFactorSymbolic_SeqSBAIJ, 1270 MatGetArray_SeqSBAIJ, 1271 MatRestoreArray_SeqSBAIJ, 1272 /*35*/ MatDuplicate_SeqSBAIJ, 1273 0, 1274 0, 1275 0, 1276 0, 1277 /*40*/ MatAXPY_SeqSBAIJ, 1278 MatGetSubMatrices_SeqSBAIJ, 1279 MatIncreaseOverlap_SeqSBAIJ, 1280 MatGetValues_SeqSBAIJ, 1281 0, 1282 /*45*/ MatPrintHelp_SeqSBAIJ, 1283 MatScale_SeqSBAIJ, 1284 0, 1285 0, 1286 0, 1287 /*50*/ 0, 1288 MatGetRowIJ_SeqSBAIJ, 1289 MatRestoreRowIJ_SeqSBAIJ, 1290 0, 1291 0, 1292 /*55*/ 0, 1293 0, 1294 0, 1295 0, 1296 MatSetValuesBlocked_SeqSBAIJ, 1297 /*60*/ MatGetSubMatrix_SeqSBAIJ, 1298 0, 1299 0, 1300 MatGetPetscMaps_Petsc, 1301 0, 1302 /*65*/ 0, 1303 0, 1304 0, 1305 0, 1306 0, 1307 /*70*/ MatGetRowMax_SeqSBAIJ, 1308 0, 1309 0, 1310 0, 1311 0, 1312 /*75*/ 0, 1313 0, 1314 0, 1315 0, 1316 0, 1317 /*80*/ 0, 1318 0, 1319 0, 1320 #if !defined(PETSC_USE_COMPLEX) 1321 MatGetInertia_SeqSBAIJ, 1322 #else 1323 0, 1324 #endif 1325 MatLoad_SeqSBAIJ, 1326 /*85*/ MatIsSymmetric_SeqSBAIJ, 1327 MatIsHermitian_SeqSBAIJ, 1328 MatIsStructurallySymmetric_SeqSBAIJ, 1329 0, 1330 0, 1331 /*90*/ 0, 1332 0, 1333 0, 1334 0, 1335 0, 1336 /*95*/ 0, 1337 0, 1338 0, 1339 0}; 1340 1341 EXTERN_C_BEGIN 1342 #undef __FUNCT__ 1343 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1344 PetscErrorCode MatStoreValues_SeqSBAIJ(Mat mat) 1345 { 1346 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1347 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 1348 PetscErrorCode ierr; 1349 1350 PetscFunctionBegin; 1351 if (aij->nonew != 1) { 1352 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1353 } 1354 1355 /* allocate space for values if not already there */ 1356 if (!aij->saved_values) { 1357 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1358 } 1359 1360 /* copy values over */ 1361 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1362 PetscFunctionReturn(0); 1363 } 1364 EXTERN_C_END 1365 1366 EXTERN_C_BEGIN 1367 #undef __FUNCT__ 1368 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1369 PetscErrorCode MatRetrieveValues_SeqSBAIJ(Mat mat) 1370 { 1371 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1372 PetscErrorCode ierr; 1373 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 1374 1375 PetscFunctionBegin; 1376 if (aij->nonew != 1) { 1377 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1378 } 1379 if (!aij->saved_values) { 1380 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 1381 } 1382 1383 /* copy values over */ 1384 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1385 PetscFunctionReturn(0); 1386 } 1387 EXTERN_C_END 1388 1389 EXTERN_C_BEGIN 1390 #undef __FUNCT__ 1391 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ" 1392 PetscErrorCode MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 1393 { 1394 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1395 PetscErrorCode ierr; 1396 PetscInt i,len,mbs,bs2; 1397 PetscTruth flg; 1398 1399 PetscFunctionBegin; 1400 B->preallocated = PETSC_TRUE; 1401 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1402 mbs = B->m/bs; 1403 bs2 = bs*bs; 1404 1405 if (mbs*bs != B->m) { 1406 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1407 } 1408 1409 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1410 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 1411 if (nnz) { 1412 for (i=0; i<mbs; i++) { 1413 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 1414 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],mbs); 1415 } 1416 } 1417 1418 ierr = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1419 if (!flg) { 1420 switch (bs) { 1421 case 1: 1422 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1423 B->ops->solve = MatSolve_SeqSBAIJ_1; 1424 B->ops->solves = MatSolves_SeqSBAIJ_1; 1425 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 1426 B->ops->mult = MatMult_SeqSBAIJ_1; 1427 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1428 break; 1429 case 2: 1430 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1431 B->ops->solve = MatSolve_SeqSBAIJ_2; 1432 B->ops->solvetranspose = MatSolve_SeqSBAIJ_2; 1433 B->ops->mult = MatMult_SeqSBAIJ_2; 1434 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1435 break; 1436 case 3: 1437 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1438 B->ops->solve = MatSolve_SeqSBAIJ_3; 1439 B->ops->solvetranspose = MatSolve_SeqSBAIJ_3; 1440 B->ops->mult = MatMult_SeqSBAIJ_3; 1441 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1442 break; 1443 case 4: 1444 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1445 B->ops->solve = MatSolve_SeqSBAIJ_4; 1446 B->ops->solvetranspose = MatSolve_SeqSBAIJ_4; 1447 B->ops->mult = MatMult_SeqSBAIJ_4; 1448 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1449 break; 1450 case 5: 1451 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1452 B->ops->solve = MatSolve_SeqSBAIJ_5; 1453 B->ops->solvetranspose = MatSolve_SeqSBAIJ_5; 1454 B->ops->mult = MatMult_SeqSBAIJ_5; 1455 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1456 break; 1457 case 6: 1458 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1459 B->ops->solve = MatSolve_SeqSBAIJ_6; 1460 B->ops->solvetranspose = MatSolve_SeqSBAIJ_6; 1461 B->ops->mult = MatMult_SeqSBAIJ_6; 1462 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1463 break; 1464 case 7: 1465 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1466 B->ops->solve = MatSolve_SeqSBAIJ_7; 1467 B->ops->solvetranspose = MatSolve_SeqSBAIJ_7; 1468 B->ops->mult = MatMult_SeqSBAIJ_7; 1469 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1470 break; 1471 } 1472 } 1473 1474 b->mbs = mbs; 1475 b->nbs = mbs; 1476 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->imax);CHKERRQ(ierr); 1477 if (!nnz) { 1478 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1479 else if (nz <= 0) nz = 1; 1480 for (i=0; i<mbs; i++) { 1481 b->imax[i] = nz; 1482 } 1483 nz = nz*mbs; /* total nz */ 1484 } else { 1485 nz = 0; 1486 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1487 } 1488 /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1489 1490 /* allocate the matrix space */ 1491 len = nz*sizeof(PetscInt) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(PetscInt); 1492 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1493 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1494 b->j = (PetscInt*)(b->a + nz*bs2); 1495 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1496 b->i = b->j + nz; 1497 b->singlemalloc = PETSC_TRUE; 1498 1499 /* pointer to beginning of each row */ 1500 b->i[0] = 0; 1501 for (i=1; i<mbs+1; i++) { 1502 b->i[i] = b->i[i-1] + b->imax[i-1]; 1503 } 1504 1505 /* b->ilen will count nonzeros in each block row so far. */ 1506 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->ilen);CHKERRQ(ierr); 1507 ierr = PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr); 1508 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1509 1510 B->bs = bs; 1511 b->bs2 = bs2; 1512 b->nz = 0; 1513 b->maxnz = nz*bs2; 1514 1515 b->inew = 0; 1516 b->jnew = 0; 1517 b->anew = 0; 1518 b->a2anew = 0; 1519 b->permute = PETSC_FALSE; 1520 PetscFunctionReturn(0); 1521 } 1522 EXTERN_C_END 1523 1524 EXTERN_C_BEGIN 1525 EXTERN PetscErrorCode MatConvert_SeqSBAIJ_SeqAIJ(Mat,const MatType,Mat*); 1526 EXTERN PetscErrorCode MatConvert_SeqSBAIJ_SeqBAIJ(Mat,const MatType,Mat*); 1527 EXTERN_C_END 1528 1529 /*MC 1530 MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices, 1531 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1532 1533 Options Database Keys: 1534 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions() 1535 1536 Level: beginner 1537 1538 .seealso: MatCreateSeqSBAIJ 1539 M*/ 1540 1541 EXTERN_C_BEGIN 1542 #undef __FUNCT__ 1543 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1544 PetscErrorCode MatCreate_SeqSBAIJ(Mat B) 1545 { 1546 Mat_SeqSBAIJ *b; 1547 PetscErrorCode ierr; 1548 PetscMPIInt size; 1549 1550 PetscFunctionBegin; 1551 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1552 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1553 B->m = B->M = PetscMax(B->m,B->M); 1554 B->n = B->N = PetscMax(B->n,B->N); 1555 1556 ierr = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1557 B->data = (void*)b; 1558 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1559 B->ops->destroy = MatDestroy_SeqSBAIJ; 1560 B->ops->view = MatView_SeqSBAIJ; 1561 B->factor = 0; 1562 B->lupivotthreshold = 1.0; 1563 B->mapping = 0; 1564 b->row = 0; 1565 b->icol = 0; 1566 b->reallocs = 0; 1567 b->saved_values = 0; 1568 1569 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1570 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1571 1572 b->sorted = PETSC_FALSE; 1573 b->roworiented = PETSC_TRUE; 1574 b->nonew = 0; 1575 b->diag = 0; 1576 b->solve_work = 0; 1577 b->mult_work = 0; 1578 B->spptr = 0; 1579 b->keepzeroedrows = PETSC_FALSE; 1580 b->xtoy = 0; 1581 b->XtoY = 0; 1582 1583 b->inew = 0; 1584 b->jnew = 0; 1585 b->anew = 0; 1586 b->a2anew = 0; 1587 b->permute = PETSC_FALSE; 1588 1589 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1590 "MatStoreValues_SeqSBAIJ", 1591 MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1592 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1593 "MatRetrieveValues_SeqSBAIJ", 1594 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1595 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1596 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1597 MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1598 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C", 1599 "MatConvert_SeqSBAIJ_SeqAIJ", 1600 MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr); 1601 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C", 1602 "MatConvert_SeqSBAIJ_SeqBAIJ", 1603 MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr); 1604 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C", 1605 "MatSeqSBAIJSetPreallocation_SeqSBAIJ", 1606 MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr); 1607 1608 B->symmetric = PETSC_TRUE; 1609 B->structurally_symmetric = PETSC_TRUE; 1610 B->symmetric_set = PETSC_TRUE; 1611 B->structurally_symmetric_set = PETSC_TRUE; 1612 PetscFunctionReturn(0); 1613 } 1614 EXTERN_C_END 1615 1616 #undef __FUNCT__ 1617 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1618 /*@C 1619 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1620 compressed row) format. For good matrix assembly performance the 1621 user should preallocate the matrix storage by setting the parameter nz 1622 (or the array nnz). By setting these parameters accurately, performance 1623 during matrix assembly can be increased by more than a factor of 50. 1624 1625 Collective on Mat 1626 1627 Input Parameters: 1628 + A - the symmetric matrix 1629 . bs - size of block 1630 . nz - number of block nonzeros per block row (same for all rows) 1631 - nnz - array containing the number of block nonzeros in the upper triangular plus 1632 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1633 1634 Options Database Keys: 1635 . -mat_no_unroll - uses code that does not unroll the loops in the 1636 block calculations (much slower) 1637 . -mat_block_size - size of the blocks to use 1638 1639 Level: intermediate 1640 1641 Notes: 1642 Specify the preallocated storage with either nz or nnz (not both). 1643 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1644 allocation. For additional details, see the users manual chapter on 1645 matrices. 1646 1647 If the nnz parameter is given then the nz parameter is ignored 1648 1649 1650 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1651 @*/ 1652 PetscErrorCode MatSeqSBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 1653 { 1654 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 1655 1656 PetscFunctionBegin; 1657 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1658 if (f) { 1659 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 1660 } 1661 PetscFunctionReturn(0); 1662 } 1663 1664 #undef __FUNCT__ 1665 #define __FUNCT__ "MatCreateSeqSBAIJ" 1666 /*@C 1667 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1668 compressed row) format. For good matrix assembly performance the 1669 user should preallocate the matrix storage by setting the parameter nz 1670 (or the array nnz). By setting these parameters accurately, performance 1671 during matrix assembly can be increased by more than a factor of 50. 1672 1673 Collective on MPI_Comm 1674 1675 Input Parameters: 1676 + comm - MPI communicator, set to PETSC_COMM_SELF 1677 . bs - size of block 1678 . m - number of rows, or number of columns 1679 . nz - number of block nonzeros per block row (same for all rows) 1680 - nnz - array containing the number of block nonzeros in the upper triangular plus 1681 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1682 1683 Output Parameter: 1684 . A - the symmetric matrix 1685 1686 Options Database Keys: 1687 . -mat_no_unroll - uses code that does not unroll the loops in the 1688 block calculations (much slower) 1689 . -mat_block_size - size of the blocks to use 1690 1691 Level: intermediate 1692 1693 Notes: 1694 1695 Specify the preallocated storage with either nz or nnz (not both). 1696 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1697 allocation. For additional details, see the users manual chapter on 1698 matrices. 1699 1700 If the nnz parameter is given then the nz parameter is ignored 1701 1702 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1703 @*/ 1704 PetscErrorCode MatCreateSeqSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 1705 { 1706 PetscErrorCode ierr; 1707 1708 PetscFunctionBegin; 1709 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1710 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1711 ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1712 PetscFunctionReturn(0); 1713 } 1714 1715 #undef __FUNCT__ 1716 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1717 PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1718 { 1719 Mat C; 1720 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1721 PetscErrorCode ierr; 1722 PetscInt i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 1723 1724 PetscFunctionBegin; 1725 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1726 1727 *B = 0; 1728 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1729 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 1730 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1731 c = (Mat_SeqSBAIJ*)C->data; 1732 1733 C->preallocated = PETSC_TRUE; 1734 C->factor = A->factor; 1735 c->row = 0; 1736 c->icol = 0; 1737 c->saved_values = 0; 1738 c->keepzeroedrows = a->keepzeroedrows; 1739 C->assembled = PETSC_TRUE; 1740 1741 C->M = A->M; 1742 C->N = A->N; 1743 C->bs = A->bs; 1744 c->bs2 = a->bs2; 1745 c->mbs = a->mbs; 1746 c->nbs = a->nbs; 1747 1748 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->imax);CHKERRQ(ierr); 1749 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->ilen);CHKERRQ(ierr); 1750 for (i=0; i<mbs; i++) { 1751 c->imax[i] = a->imax[i]; 1752 c->ilen[i] = a->ilen[i]; 1753 } 1754 1755 /* allocate the matrix space */ 1756 c->singlemalloc = PETSC_TRUE; 1757 len = (mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt)); 1758 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1759 c->j = (PetscInt*)(c->a + nz*bs2); 1760 c->i = c->j + nz; 1761 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1762 if (mbs > 0) { 1763 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1764 if (cpvalues == MAT_COPY_VALUES) { 1765 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1766 } else { 1767 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1768 } 1769 } 1770 1771 ierr = PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr); 1772 c->sorted = a->sorted; 1773 c->roworiented = a->roworiented; 1774 c->nonew = a->nonew; 1775 1776 if (a->diag) { 1777 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 1778 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1779 for (i=0; i<mbs; i++) { 1780 c->diag[i] = a->diag[i]; 1781 } 1782 } else c->diag = 0; 1783 c->nz = a->nz; 1784 c->maxnz = a->maxnz; 1785 c->solve_work = 0; 1786 c->mult_work = 0; 1787 *B = C; 1788 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1789 PetscFunctionReturn(0); 1790 } 1791 1792 #undef __FUNCT__ 1793 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1794 PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer,const MatType type,Mat *A) 1795 { 1796 Mat_SeqSBAIJ *a; 1797 Mat B; 1798 PetscErrorCode ierr; 1799 int fd; 1800 PetscMPIInt size; 1801 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 1802 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 1803 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 1804 PetscInt *masked,nmask,tmp,bs2,ishift; 1805 PetscScalar *aa; 1806 MPI_Comm comm = ((PetscObject)viewer)->comm; 1807 1808 PetscFunctionBegin; 1809 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1810 bs2 = bs*bs; 1811 1812 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1813 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1814 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1815 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1816 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1817 M = header[1]; N = header[2]; nz = header[3]; 1818 1819 if (header[3] < 0) { 1820 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1821 } 1822 1823 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1824 1825 /* 1826 This code adds extra rows to make sure the number of rows is 1827 divisible by the blocksize 1828 */ 1829 mbs = M/bs; 1830 extra_rows = bs - M + bs*(mbs); 1831 if (extra_rows == bs) extra_rows = 0; 1832 else mbs++; 1833 if (extra_rows) { 1834 PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n"); 1835 } 1836 1837 /* read in row lengths */ 1838 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1839 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1840 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1841 1842 /* read in column indices */ 1843 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1844 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1845 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1846 1847 /* loop over row lengths determining block row lengths */ 1848 ierr = PetscMalloc(mbs*sizeof(PetscInt),&s_browlengths);CHKERRQ(ierr); 1849 ierr = PetscMemzero(s_browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1850 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 1851 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1852 masked = mask + mbs; 1853 rowcount = 0; nzcount = 0; 1854 for (i=0; i<mbs; i++) { 1855 nmask = 0; 1856 for (j=0; j<bs; j++) { 1857 kmax = rowlengths[rowcount]; 1858 for (k=0; k<kmax; k++) { 1859 tmp = jj[nzcount++]/bs; /* block col. index */ 1860 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1861 } 1862 rowcount++; 1863 } 1864 s_browlengths[i] += nmask; 1865 1866 /* zero out the mask elements we set */ 1867 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1868 } 1869 1870 /* create our matrix */ 1871 ierr = MatCreate(comm,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows,&B);CHKERRQ(ierr); 1872 ierr = MatSetType(B,type);CHKERRQ(ierr); 1873 ierr = MatSeqSBAIJSetPreallocation(B,bs,0,s_browlengths);CHKERRQ(ierr); 1874 a = (Mat_SeqSBAIJ*)B->data; 1875 1876 /* set matrix "i" values */ 1877 a->i[0] = 0; 1878 for (i=1; i<= mbs; i++) { 1879 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1880 a->ilen[i-1] = s_browlengths[i-1]; 1881 } 1882 a->nz = a->i[mbs]; 1883 1884 /* read in nonzero values */ 1885 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1886 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1887 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1888 1889 /* set "a" and "j" values into matrix */ 1890 nzcount = 0; jcount = 0; 1891 for (i=0; i<mbs; i++) { 1892 nzcountb = nzcount; 1893 nmask = 0; 1894 for (j=0; j<bs; j++) { 1895 kmax = rowlengths[i*bs+j]; 1896 for (k=0; k<kmax; k++) { 1897 tmp = jj[nzcount++]/bs; /* block col. index */ 1898 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1899 } 1900 } 1901 /* sort the masked values */ 1902 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1903 1904 /* set "j" values into matrix */ 1905 maskcount = 1; 1906 for (j=0; j<nmask; j++) { 1907 a->j[jcount++] = masked[j]; 1908 mask[masked[j]] = maskcount++; 1909 } 1910 1911 /* set "a" values into matrix */ 1912 ishift = bs2*a->i[i]; 1913 for (j=0; j<bs; j++) { 1914 kmax = rowlengths[i*bs+j]; 1915 for (k=0; k<kmax; k++) { 1916 tmp = jj[nzcountb]/bs ; /* block col. index */ 1917 if (tmp >= i){ 1918 block = mask[tmp] - 1; 1919 point = jj[nzcountb] - bs*tmp; 1920 idx = ishift + bs2*block + j + bs*point; 1921 a->a[idx] = aa[nzcountb]; 1922 } 1923 nzcountb++; 1924 } 1925 } 1926 /* zero out the mask elements we set */ 1927 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1928 } 1929 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1930 1931 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1932 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1933 ierr = PetscFree(aa);CHKERRQ(ierr); 1934 ierr = PetscFree(jj);CHKERRQ(ierr); 1935 ierr = PetscFree(mask);CHKERRQ(ierr); 1936 1937 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1938 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1939 ierr = MatView_Private(B);CHKERRQ(ierr); 1940 *A = B; 1941 PetscFunctionReturn(0); 1942 } 1943 1944 #undef __FUNCT__ 1945 #define __FUNCT__ "MatRelax_SeqSBAIJ" 1946 PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1947 { 1948 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1949 MatScalar *aa=a->a,*v,*v1; 1950 PetscScalar *x,*b,*t,sum,d; 1951 PetscErrorCode ierr; 1952 PetscInt m=a->mbs,bs=A->bs,*ai=a->i,*aj=a->j; 1953 PetscInt nz,nz1,*vj,*vj1,i; 1954 1955 PetscFunctionBegin; 1956 its = its*lits; 1957 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1958 1959 if (bs > 1) 1960 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 1961 1962 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1963 if (xx != bb) { 1964 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1965 } else { 1966 b = x; 1967 } 1968 1969 ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr); 1970 1971 if (flag & SOR_ZERO_INITIAL_GUESS) { 1972 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1973 for (i=0; i<m; i++) 1974 t[i] = b[i]; 1975 1976 for (i=0; i<m; i++){ 1977 d = *(aa + ai[i]); /* diag[i] */ 1978 v = aa + ai[i] + 1; 1979 vj = aj + ai[i] + 1; 1980 nz = ai[i+1] - ai[i] - 1; 1981 x[i] = omega*t[i]/d; 1982 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 1983 PetscLogFlops(2*nz-1); 1984 } 1985 } 1986 1987 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 1988 for (i=0; i<m; i++) 1989 t[i] = b[i]; 1990 1991 for (i=0; i<m-1; i++){ /* update rhs */ 1992 v = aa + ai[i] + 1; 1993 vj = aj + ai[i] + 1; 1994 nz = ai[i+1] - ai[i] - 1; 1995 while (nz--) t[*vj++] -= x[i]*(*v++); 1996 PetscLogFlops(2*nz-1); 1997 } 1998 for (i=m-1; i>=0; i--){ 1999 d = *(aa + ai[i]); 2000 v = aa + ai[i] + 1; 2001 vj = aj + ai[i] + 1; 2002 nz = ai[i+1] - ai[i] - 1; 2003 sum = t[i]; 2004 while (nz--) sum -= x[*vj++]*(*v++); 2005 PetscLogFlops(2*nz-1); 2006 x[i] = (1-omega)*x[i] + omega*sum/d; 2007 } 2008 } 2009 its--; 2010 } 2011 2012 while (its--) { 2013 /* 2014 forward sweep: 2015 for i=0,...,m-1: 2016 sum[i] = (b[i] - U(i,:)x )/d[i]; 2017 x[i] = (1-omega)x[i] + omega*sum[i]; 2018 b = b - x[i]*U^T(i,:); 2019 2020 */ 2021 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2022 for (i=0; i<m; i++) 2023 t[i] = b[i]; 2024 2025 for (i=0; i<m; i++){ 2026 d = *(aa + ai[i]); /* diag[i] */ 2027 v = aa + ai[i] + 1; v1=v; 2028 vj = aj + ai[i] + 1; vj1=vj; 2029 nz = ai[i+1] - ai[i] - 1; nz1=nz; 2030 sum = t[i]; 2031 while (nz1--) sum -= (*v1++)*x[*vj1++]; 2032 x[i] = (1-omega)*x[i] + omega*sum/d; 2033 while (nz--) t[*vj++] -= x[i]*(*v++); 2034 PetscLogFlops(4*nz-2); 2035 } 2036 } 2037 2038 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2039 /* 2040 backward sweep: 2041 b = b - x[i]*U^T(i,:), i=0,...,n-2 2042 for i=m-1,...,0: 2043 sum[i] = (b[i] - U(i,:)x )/d[i]; 2044 x[i] = (1-omega)x[i] + omega*sum[i]; 2045 */ 2046 for (i=0; i<m; i++) 2047 t[i] = b[i]; 2048 2049 for (i=0; i<m-1; i++){ /* update rhs */ 2050 v = aa + ai[i] + 1; 2051 vj = aj + ai[i] + 1; 2052 nz = ai[i+1] - ai[i] - 1; 2053 while (nz--) t[*vj++] -= x[i]*(*v++); 2054 PetscLogFlops(2*nz-1); 2055 } 2056 for (i=m-1; i>=0; i--){ 2057 d = *(aa + ai[i]); 2058 v = aa + ai[i] + 1; 2059 vj = aj + ai[i] + 1; 2060 nz = ai[i+1] - ai[i] - 1; 2061 sum = t[i]; 2062 while (nz--) sum -= x[*vj++]*(*v++); 2063 PetscLogFlops(2*nz-1); 2064 x[i] = (1-omega)*x[i] + omega*sum/d; 2065 } 2066 } 2067 } 2068 2069 ierr = PetscFree(t);CHKERRQ(ierr); 2070 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2071 if (bb != xx) { 2072 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2073 } 2074 PetscFunctionReturn(0); 2075 } 2076 2077 2078 2079 2080 2081 2082