1 2 /* 3 Defines the basic matrix operations for the SBAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 7 #include "src/inline/spops.h" 8 #include "src/mat/impls/sbaij/seq/sbaij.h" 9 10 #define CHUNKSIZE 10 11 12 /* 13 Checks for missing diagonals 14 */ 15 #undef __FUNCT__ 16 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 17 PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A) 18 { 19 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 20 PetscErrorCode ierr; 21 PetscInt *diag,*jj = a->j,i; 22 23 PetscFunctionBegin; 24 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 25 diag = a->diag; 26 for (i=0; i<a->mbs; i++) { 27 if (jj[diag[i]] != i) SETERRQ1(PETSC_ERR_ARG_CORRUPT,"Matrix is missing diagonal number %D",i); 28 } 29 PetscFunctionReturn(0); 30 } 31 32 #undef __FUNCT__ 33 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 34 PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A) 35 { 36 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 37 PetscErrorCode ierr; 38 PetscInt i,mbs = a->mbs; 39 40 PetscFunctionBegin; 41 if (a->diag) PetscFunctionReturn(0); 42 43 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 44 PetscLogObjectMemory(A,(mbs+1)*sizeof(PetscInt)); 45 for (i=0; i<mbs; i++) a->diag[i] = a->i[i]; 46 PetscFunctionReturn(0); 47 } 48 49 #undef __FUNCT__ 50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 51 static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 52 { 53 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 54 PetscInt n = a->mbs,i; 55 56 PetscFunctionBegin; 57 *nn = n; 58 if (!ia) PetscFunctionReturn(0); 59 60 if (oshift == 1) { 61 /* temporarily add 1 to i and j indices */ 62 PetscInt nz = a->i[n]; 63 for (i=0; i<nz; i++) a->j[i]++; 64 for (i=0; i<n+1; i++) a->i[i]++; 65 *ia = a->i; *ja = a->j; 66 } else { 67 *ia = a->i; *ja = a->j; 68 } 69 PetscFunctionReturn(0); 70 } 71 72 #undef __FUNCT__ 73 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 74 static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 75 { 76 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 77 PetscInt i,n = a->mbs; 78 79 PetscFunctionBegin; 80 if (!ia) PetscFunctionReturn(0); 81 82 if (oshift == 1) { 83 PetscInt nz = a->i[n]-1; 84 for (i=0; i<nz; i++) a->j[i]--; 85 for (i=0; i<n+1; i++) a->i[i]--; 86 } 87 PetscFunctionReturn(0); 88 } 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 92 PetscErrorCode MatDestroy_SeqSBAIJ(Mat A) 93 { 94 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 95 PetscErrorCode ierr; 96 97 PetscFunctionBegin; 98 PetscLogObjectState((PetscObject)A,"Rows=%D, NZ=%D",A->m,a->nz); 99 ierr = PetscFree(a->a);CHKERRQ(ierr); 100 if (!a->singlemalloc) { 101 ierr = PetscFree(a->i);CHKERRQ(ierr); 102 ierr = PetscFree(a->j);CHKERRQ(ierr); 103 } 104 if (a->row) { 105 ierr = ISDestroy(a->row);CHKERRQ(ierr); 106 } 107 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 108 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 109 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 110 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 111 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 112 if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);} 113 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 114 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 115 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 116 117 if (a->inew){ 118 ierr = PetscFree(a->inew);CHKERRQ(ierr); 119 a->inew = 0; 120 } 121 ierr = PetscFree(a);CHKERRQ(ierr); 122 123 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 124 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 125 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 126 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 127 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqbaij_C","",PETSC_NULL);CHKERRQ(ierr); 128 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 129 PetscFunctionReturn(0); 130 } 131 132 #undef __FUNCT__ 133 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 134 PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op) 135 { 136 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 137 138 PetscFunctionBegin; 139 switch (op) { 140 case MAT_ROW_ORIENTED: 141 a->roworiented = PETSC_TRUE; 142 break; 143 case MAT_COLUMN_ORIENTED: 144 a->roworiented = PETSC_FALSE; 145 break; 146 case MAT_COLUMNS_SORTED: 147 a->sorted = PETSC_TRUE; 148 break; 149 case MAT_COLUMNS_UNSORTED: 150 a->sorted = PETSC_FALSE; 151 break; 152 case MAT_KEEP_ZEROED_ROWS: 153 a->keepzeroedrows = PETSC_TRUE; 154 break; 155 case MAT_NO_NEW_NONZERO_LOCATIONS: 156 a->nonew = 1; 157 break; 158 case MAT_NEW_NONZERO_LOCATION_ERR: 159 a->nonew = -1; 160 break; 161 case MAT_NEW_NONZERO_ALLOCATION_ERR: 162 a->nonew = -2; 163 break; 164 case MAT_YES_NEW_NONZERO_LOCATIONS: 165 a->nonew = 0; 166 break; 167 case MAT_ROWS_SORTED: 168 case MAT_ROWS_UNSORTED: 169 case MAT_YES_NEW_DIAGONALS: 170 case MAT_IGNORE_OFF_PROC_ENTRIES: 171 case MAT_USE_HASH_TABLE: 172 PetscLogInfo(A,"MatSetOption_SeqSBAIJ:Option ignored\n"); 173 break; 174 case MAT_NO_NEW_DIAGONALS: 175 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 176 case MAT_NOT_SYMMETRIC: 177 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 178 case MAT_HERMITIAN: 179 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 180 case MAT_SYMMETRIC: 181 case MAT_STRUCTURALLY_SYMMETRIC: 182 case MAT_NOT_HERMITIAN: 183 case MAT_SYMMETRY_ETERNAL: 184 case MAT_NOT_SYMMETRY_ETERNAL: 185 break; 186 default: 187 SETERRQ(PETSC_ERR_SUP,"unknown option"); 188 } 189 PetscFunctionReturn(0); 190 } 191 192 #undef __FUNCT__ 193 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 194 PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **v) 195 { 196 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 197 PetscErrorCode ierr; 198 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2; 199 MatScalar *aa,*aa_i; 200 PetscScalar *v_i; 201 202 PetscFunctionBegin; 203 bs = A->bs; 204 ai = a->i; 205 aj = a->j; 206 aa = a->a; 207 bs2 = a->bs2; 208 209 if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %D out of range", row); 210 211 bn = row/bs; /* Block number */ 212 bp = row % bs; /* Block position */ 213 M = ai[bn+1] - ai[bn]; 214 *ncols = bs*M; 215 216 if (v) { 217 *v = 0; 218 if (*ncols) { 219 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 220 for (i=0; i<M; i++) { /* for each block in the block row */ 221 v_i = *v + i*bs; 222 aa_i = aa + bs2*(ai[bn] + i); 223 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 224 } 225 } 226 } 227 228 if (cols) { 229 *cols = 0; 230 if (*ncols) { 231 ierr = PetscMalloc((*ncols+row)*sizeof(PetscInt),cols);CHKERRQ(ierr); 232 for (i=0; i<M; i++) { /* for each block in the block row */ 233 cols_i = *cols + i*bs; 234 itmp = bs*aj[ai[bn] + i]; 235 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 236 } 237 } 238 } 239 240 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 241 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 242 #ifdef column_search 243 v_i = *v + M*bs; 244 cols_i = *cols + M*bs; 245 for (i=0; i<bn; i++){ /* for each block row */ 246 M = ai[i+1] - ai[i]; 247 for (j=0; j<M; j++){ 248 itmp = aj[ai[i] + j]; /* block column value */ 249 if (itmp == bn){ 250 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 251 for (k=0; k<bs; k++) { 252 *cols_i++ = i*bs+k; 253 *v_i++ = aa_i[k]; 254 } 255 *ncols += bs; 256 break; 257 } 258 } 259 } 260 #endif 261 262 PetscFunctionReturn(0); 263 } 264 265 #undef __FUNCT__ 266 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 267 PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 268 { 269 PetscErrorCode ierr; 270 271 PetscFunctionBegin; 272 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 273 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 274 PetscFunctionReturn(0); 275 } 276 277 #undef __FUNCT__ 278 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 279 PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,Mat *B) 280 { 281 PetscErrorCode ierr; 282 PetscFunctionBegin; 283 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 284 PetscFunctionReturn(0); 285 } 286 287 #undef __FUNCT__ 288 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 289 static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 290 { 291 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 292 PetscErrorCode ierr; 293 PetscInt i,j,bs = A->bs,k,l,bs2=a->bs2; 294 char *name; 295 PetscViewerFormat format; 296 297 PetscFunctionBegin; 298 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 299 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 300 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 301 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 302 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 303 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 304 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 305 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 306 for (i=0; i<a->mbs; i++) { 307 for (j=0; j<bs; j++) { 308 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 309 for (k=a->i[i]; k<a->i[i+1]; k++) { 310 for (l=0; l<bs; l++) { 311 #if defined(PETSC_USE_COMPLEX) 312 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 313 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 314 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 315 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 316 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 317 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 318 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 319 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 320 } 321 #else 322 if (a->a[bs2*k + l*bs + j] != 0.0) { 323 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 324 } 325 #endif 326 } 327 } 328 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 329 } 330 } 331 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 332 } else { 333 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 334 for (i=0; i<a->mbs; i++) { 335 for (j=0; j<bs; j++) { 336 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 337 for (k=a->i[i]; k<a->i[i+1]; k++) { 338 for (l=0; l<bs; l++) { 339 #if defined(PETSC_USE_COMPLEX) 340 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 341 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 342 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 343 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 344 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 345 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 346 } else { 347 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 348 } 349 #else 350 ierr = PetscViewerASCIIPrintf(viewer," %D %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 351 #endif 352 } 353 } 354 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 355 } 356 } 357 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 358 } 359 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 360 PetscFunctionReturn(0); 361 } 362 363 #undef __FUNCT__ 364 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 365 static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 366 { 367 Mat A = (Mat) Aa; 368 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 369 PetscErrorCode ierr; 370 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->bs,bs2=a->bs2; 371 PetscMPIInt rank; 372 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 373 MatScalar *aa; 374 MPI_Comm comm; 375 PetscViewer viewer; 376 377 PetscFunctionBegin; 378 /* 379 This is nasty. If this is called from an originally parallel matrix 380 then all processes call this,but only the first has the matrix so the 381 rest should return immediately. 382 */ 383 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 384 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 385 if (rank) PetscFunctionReturn(0); 386 387 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 388 389 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 390 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 391 392 /* loop over matrix elements drawing boxes */ 393 color = PETSC_DRAW_BLUE; 394 for (i=0,row=0; i<mbs; i++,row+=bs) { 395 for (j=a->i[i]; j<a->i[i+1]; j++) { 396 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 397 x_l = a->j[j]*bs; x_r = x_l + 1.0; 398 aa = a->a + j*bs2; 399 for (k=0; k<bs; k++) { 400 for (l=0; l<bs; l++) { 401 if (PetscRealPart(*aa++) >= 0.) continue; 402 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 403 } 404 } 405 } 406 } 407 color = PETSC_DRAW_CYAN; 408 for (i=0,row=0; i<mbs; i++,row+=bs) { 409 for (j=a->i[i]; j<a->i[i+1]; j++) { 410 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 411 x_l = a->j[j]*bs; x_r = x_l + 1.0; 412 aa = a->a + j*bs2; 413 for (k=0; k<bs; k++) { 414 for (l=0; l<bs; l++) { 415 if (PetscRealPart(*aa++) != 0.) continue; 416 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 417 } 418 } 419 } 420 } 421 422 color = PETSC_DRAW_RED; 423 for (i=0,row=0; i<mbs; i++,row+=bs) { 424 for (j=a->i[i]; j<a->i[i+1]; j++) { 425 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 426 x_l = a->j[j]*bs; x_r = x_l + 1.0; 427 aa = a->a + j*bs2; 428 for (k=0; k<bs; k++) { 429 for (l=0; l<bs; l++) { 430 if (PetscRealPart(*aa++) <= 0.) continue; 431 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 432 } 433 } 434 } 435 } 436 PetscFunctionReturn(0); 437 } 438 439 #undef __FUNCT__ 440 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 441 static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 442 { 443 PetscErrorCode ierr; 444 PetscReal xl,yl,xr,yr,w,h; 445 PetscDraw draw; 446 PetscTruth isnull; 447 448 PetscFunctionBegin; 449 450 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 451 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 452 453 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 454 xr = A->m; yr = A->m; h = yr/10.0; w = xr/10.0; 455 xr += w; yr += h; xl = -w; yl = -h; 456 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 457 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 458 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 459 PetscFunctionReturn(0); 460 } 461 462 #undef __FUNCT__ 463 #define __FUNCT__ "MatView_SeqSBAIJ" 464 PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 465 { 466 PetscErrorCode ierr; 467 PetscTruth iascii,isdraw; 468 469 PetscFunctionBegin; 470 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 471 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 472 if (iascii){ 473 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 474 } else if (isdraw) { 475 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 476 } else { 477 Mat B; 478 ierr = MatConvert(A,MATSEQAIJ,&B);CHKERRQ(ierr); 479 ierr = MatView(B,viewer);CHKERRQ(ierr); 480 ierr = MatDestroy(B);CHKERRQ(ierr); 481 } 482 PetscFunctionReturn(0); 483 } 484 485 486 #undef __FUNCT__ 487 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 488 PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 489 { 490 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 491 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 492 PetscInt *ai = a->i,*ailen = a->ilen; 493 PetscInt brow,bcol,ridx,cidx,bs=A->bs,bs2=a->bs2; 494 MatScalar *ap,*aa = a->a,zero = 0.0; 495 496 PetscFunctionBegin; 497 for (k=0; k<m; k++) { /* loop over rows */ 498 row = im[k]; brow = row/bs; 499 if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",row); 500 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1); 501 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 502 nrow = ailen[brow]; 503 for (l=0; l<n; l++) { /* loop over columns */ 504 if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",in[l]); 505 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->n-1); 506 col = in[l] ; 507 bcol = col/bs; 508 cidx = col%bs; 509 ridx = row%bs; 510 high = nrow; 511 low = 0; /* assume unsorted */ 512 while (high-low > 5) { 513 t = (low+high)/2; 514 if (rp[t] > bcol) high = t; 515 else low = t; 516 } 517 for (i=low; i<high; i++) { 518 if (rp[i] > bcol) break; 519 if (rp[i] == bcol) { 520 *v++ = ap[bs2*i+bs*cidx+ridx]; 521 goto finished; 522 } 523 } 524 *v++ = zero; 525 finished:; 526 } 527 } 528 PetscFunctionReturn(0); 529 } 530 531 532 #undef __FUNCT__ 533 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 534 PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 535 { 536 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 537 PetscErrorCode ierr; 538 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 539 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 540 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->bs,stepval; 541 PetscTruth roworiented=a->roworiented; 542 const MatScalar *value = v; 543 MatScalar *ap,*aa = a->a,*bap; 544 545 PetscFunctionBegin; 546 if (roworiented) { 547 stepval = (n-1)*bs; 548 } else { 549 stepval = (m-1)*bs; 550 } 551 for (k=0; k<m; k++) { /* loop over added rows */ 552 row = im[k]; 553 if (row < 0) continue; 554 #if defined(PETSC_USE_BOPT_g) 555 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 556 #endif 557 rp = aj + ai[row]; 558 ap = aa + bs2*ai[row]; 559 rmax = imax[row]; 560 nrow = ailen[row]; 561 low = 0; 562 for (l=0; l<n; l++) { /* loop over added columns */ 563 if (in[l] < 0) continue; 564 col = in[l]; 565 #if defined(PETSC_USE_BOPT_g) 566 if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",col,a->nbs-1); 567 #endif 568 if (col < row) continue; /* ignore lower triangular block */ 569 if (roworiented) { 570 value = v + k*(stepval+bs)*bs + l*bs; 571 } else { 572 value = v + l*(stepval+bs)*bs + k*bs; 573 } 574 if (!sorted) low = 0; high = nrow; 575 while (high-low > 7) { 576 t = (low+high)/2; 577 if (rp[t] > col) high = t; 578 else low = t; 579 } 580 for (i=low; i<high; i++) { 581 if (rp[i] > col) break; 582 if (rp[i] == col) { 583 bap = ap + bs2*i; 584 if (roworiented) { 585 if (is == ADD_VALUES) { 586 for (ii=0; ii<bs; ii++,value+=stepval) { 587 for (jj=ii; jj<bs2; jj+=bs) { 588 bap[jj] += *value++; 589 } 590 } 591 } else { 592 for (ii=0; ii<bs; ii++,value+=stepval) { 593 for (jj=ii; jj<bs2; jj+=bs) { 594 bap[jj] = *value++; 595 } 596 } 597 } 598 } else { 599 if (is == ADD_VALUES) { 600 for (ii=0; ii<bs; ii++,value+=stepval) { 601 for (jj=0; jj<bs; jj++) { 602 *bap++ += *value++; 603 } 604 } 605 } else { 606 for (ii=0; ii<bs; ii++,value+=stepval) { 607 for (jj=0; jj<bs; jj++) { 608 *bap++ = *value++; 609 } 610 } 611 } 612 } 613 goto noinsert2; 614 } 615 } 616 if (nonew == 1) goto noinsert2; 617 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 618 if (nrow >= rmax) { 619 /* there is no extra room in row, therefore enlarge */ 620 PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 621 MatScalar *new_a; 622 623 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 624 625 /* malloc new storage space */ 626 len = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); 627 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 628 new_j = (PetscInt*)(new_a + bs2*new_nz); 629 new_i = new_j + new_nz; 630 631 /* copy over old data into new slots */ 632 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 633 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 634 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); 635 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 636 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); 637 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 638 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 639 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 640 /* free up old matrix storage */ 641 ierr = PetscFree(a->a);CHKERRQ(ierr); 642 if (!a->singlemalloc) { 643 ierr = PetscFree(a->i);CHKERRQ(ierr); 644 ierr = PetscFree(a->j);CHKERRQ(ierr); 645 } 646 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 647 a->singlemalloc = PETSC_TRUE; 648 649 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 650 rmax = imax[row] = imax[row] + CHUNKSIZE; 651 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar))); 652 a->maxnz += bs2*CHUNKSIZE; 653 a->reallocs++; 654 a->nz++; 655 } 656 N = nrow++ - 1; 657 /* shift up all the later entries in this row */ 658 for (ii=N; ii>=i; ii--) { 659 rp[ii+1] = rp[ii]; 660 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 661 } 662 if (N >= i) { 663 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 664 } 665 rp[i] = col; 666 bap = ap + bs2*i; 667 if (roworiented) { 668 for (ii=0; ii<bs; ii++,value+=stepval) { 669 for (jj=ii; jj<bs2; jj+=bs) { 670 bap[jj] = *value++; 671 } 672 } 673 } else { 674 for (ii=0; ii<bs; ii++,value+=stepval) { 675 for (jj=0; jj<bs; jj++) { 676 *bap++ = *value++; 677 } 678 } 679 } 680 noinsert2:; 681 low = i; 682 } 683 ailen[row] = nrow; 684 } 685 PetscFunctionReturn(0); 686 } 687 688 #undef __FUNCT__ 689 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 690 PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 691 { 692 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 693 PetscErrorCode ierr; 694 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 695 PetscInt m = A->m,*ip,N,*ailen = a->ilen; 696 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 697 MatScalar *aa = a->a,*ap; 698 699 PetscFunctionBegin; 700 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 701 702 if (m) rmax = ailen[0]; 703 for (i=1; i<mbs; i++) { 704 /* move each row back by the amount of empty slots (fshift) before it*/ 705 fshift += imax[i-1] - ailen[i-1]; 706 rmax = PetscMax(rmax,ailen[i]); 707 if (fshift) { 708 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 709 N = ailen[i]; 710 for (j=0; j<N; j++) { 711 ip[j-fshift] = ip[j]; 712 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 713 } 714 } 715 ai[i] = ai[i-1] + ailen[i-1]; 716 } 717 if (mbs) { 718 fshift += imax[mbs-1] - ailen[mbs-1]; 719 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 720 } 721 /* reset ilen and imax for each row */ 722 for (i=0; i<mbs; i++) { 723 ailen[i] = imax[i] = ai[i+1] - ai[i]; 724 } 725 a->nz = ai[mbs]; 726 727 /* diagonals may have moved, reset it */ 728 if (a->diag) { 729 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 730 } 731 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n", 732 m,A->m,A->bs,fshift*bs2,a->nz*bs2); 733 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %D\n", 734 a->reallocs); 735 PetscLogInfo(A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %D\n",rmax); 736 a->reallocs = 0; 737 A->info.nz_unneeded = (PetscReal)fshift*bs2; 738 739 PetscFunctionReturn(0); 740 } 741 742 /* 743 This function returns an array of flags which indicate the locations of contiguous 744 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 745 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 746 Assume: sizes should be long enough to hold all the values. 747 */ 748 #undef __FUNCT__ 749 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 750 PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 751 { 752 PetscInt i,j,k,row; 753 PetscTruth flg; 754 755 PetscFunctionBegin; 756 for (i=0,j=0; i<n; j++) { 757 row = idx[i]; 758 if (row%bs!=0) { /* Not the begining of a block */ 759 sizes[j] = 1; 760 i++; 761 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 762 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 763 i++; 764 } else { /* Begining of the block, so check if the complete block exists */ 765 flg = PETSC_TRUE; 766 for (k=1; k<bs; k++) { 767 if (row+k != idx[i+k]) { /* break in the block */ 768 flg = PETSC_FALSE; 769 break; 770 } 771 } 772 if (flg == PETSC_TRUE) { /* No break in the bs */ 773 sizes[j] = bs; 774 i+= bs; 775 } else { 776 sizes[j] = 1; 777 i++; 778 } 779 } 780 } 781 *bs_max = j; 782 PetscFunctionReturn(0); 783 } 784 785 #undef __FUNCT__ 786 #define __FUNCT__ "MatZeroRows_SeqSBAIJ" 787 PetscErrorCode MatZeroRows_SeqSBAIJ(Mat A,IS is,const PetscScalar *diag) 788 { 789 PetscFunctionBegin; 790 SETERRQ(PETSC_ERR_SUP,"No support for this function yet"); 791 } 792 793 /* Only add/insert a(i,j) with i<=j (blocks). 794 Any a(i,j) with i>j input by user is ingored. 795 */ 796 797 #undef __FUNCT__ 798 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 799 PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 800 { 801 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 802 PetscErrorCode ierr; 803 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 804 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 805 PetscInt *aj=a->j,nonew=a->nonew,bs=A->bs,brow,bcol; 806 PetscInt ridx,cidx,bs2=a->bs2; 807 MatScalar *ap,value,*aa=a->a,*bap; 808 809 PetscFunctionBegin; 810 811 for (k=0; k<m; k++) { /* loop over added rows */ 812 row = im[k]; /* row number */ 813 brow = row/bs; /* block row number */ 814 if (row < 0) continue; 815 #if defined(PETSC_USE_BOPT_g) 816 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1); 817 #endif 818 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 819 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 820 rmax = imax[brow]; /* maximum space allocated for this row */ 821 nrow = ailen[brow]; /* actual length of this row */ 822 low = 0; 823 824 for (l=0; l<n; l++) { /* loop over added columns */ 825 if (in[l] < 0) continue; 826 #if defined(PETSC_USE_BOPT_g) 827 if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->m-1); 828 #endif 829 col = in[l]; 830 bcol = col/bs; /* block col number */ 831 832 if (brow <= bcol){ 833 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 834 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 835 /* element value a(k,l) */ 836 if (roworiented) { 837 value = v[l + k*n]; 838 } else { 839 value = v[k + l*m]; 840 } 841 842 /* move pointer bap to a(k,l) quickly and add/insert value */ 843 if (!sorted) low = 0; high = nrow; 844 while (high-low > 7) { 845 t = (low+high)/2; 846 if (rp[t] > bcol) high = t; 847 else low = t; 848 } 849 for (i=low; i<high; i++) { 850 /* printf("The loop of i=low.., rp[%D]=%D\n",i,rp[i]); */ 851 if (rp[i] > bcol) break; 852 if (rp[i] == bcol) { 853 bap = ap + bs2*i + bs*cidx + ridx; 854 if (is == ADD_VALUES) *bap += value; 855 else *bap = value; 856 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 857 if (brow == bcol && ridx < cidx){ 858 bap = ap + bs2*i + bs*ridx + cidx; 859 if (is == ADD_VALUES) *bap += value; 860 else *bap = value; 861 } 862 goto noinsert1; 863 } 864 } 865 866 if (nonew == 1) goto noinsert1; 867 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 868 if (nrow >= rmax) { 869 /* there is no extra room in row, therefore enlarge */ 870 PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 871 MatScalar *new_a; 872 873 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 874 875 /* Malloc new storage space */ 876 len = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); 877 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 878 new_j = (PetscInt*)(new_a + bs2*new_nz); 879 new_i = new_j + new_nz; 880 881 /* copy over old data into new slots */ 882 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 883 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 884 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); 885 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 886 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); 887 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 888 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 889 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 890 /* free up old matrix storage */ 891 ierr = PetscFree(a->a);CHKERRQ(ierr); 892 if (!a->singlemalloc) { 893 ierr = PetscFree(a->i);CHKERRQ(ierr); 894 ierr = PetscFree(a->j);CHKERRQ(ierr); 895 } 896 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 897 a->singlemalloc = PETSC_TRUE; 898 899 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 900 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 901 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar))); 902 a->maxnz += bs2*CHUNKSIZE; 903 a->reallocs++; 904 a->nz++; 905 } 906 907 N = nrow++ - 1; 908 /* shift up all the later entries in this row */ 909 for (ii=N; ii>=i; ii--) { 910 rp[ii+1] = rp[ii]; 911 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 912 } 913 if (N>=i) { 914 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 915 } 916 rp[i] = bcol; 917 ap[bs2*i + bs*cidx + ridx] = value; 918 noinsert1:; 919 low = i; 920 /* } */ 921 } 922 } /* end of if .. if.. */ 923 } /* end of loop over added columns */ 924 ailen[brow] = nrow; 925 } /* end of loop over added rows */ 926 927 PetscFunctionReturn(0); 928 } 929 930 EXTERN PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*); 931 EXTERN PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*); 932 EXTERN PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat,PetscInt,IS[],PetscInt); 933 EXTERN PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,PetscInt,MatReuse,Mat*); 934 EXTERN PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat*[]); 935 EXTERN PetscErrorCode MatScale_SeqSBAIJ(const PetscScalar*,Mat); 936 EXTERN PetscErrorCode MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *); 937 EXTERN PetscErrorCode MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*); 938 EXTERN PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat,Vec); 939 EXTERN PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec); 940 EXTERN PetscErrorCode MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *); 941 EXTERN PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat); 942 EXTERN PetscErrorCode MatGetRowMax_SeqSBAIJ(Mat,Vec); 943 944 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N(Mat,Vec,Vec); 945 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1(Mat,Vec,Vec); 946 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2(Mat,Vec,Vec); 947 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3(Mat,Vec,Vec); 948 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4(Mat,Vec,Vec); 949 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5(Mat,Vec,Vec); 950 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6(Mat,Vec,Vec); 951 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7(Mat,Vec,Vec); 952 953 EXTERN PetscErrorCode MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs); 954 955 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec); 956 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec); 957 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec); 958 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 959 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec); 960 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec); 961 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec); 962 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec); 963 964 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,Mat*); 965 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,Mat*); 966 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,Mat*); 967 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,Mat*); 968 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,Mat*); 969 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,Mat*); 970 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,Mat*); 971 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,Mat*); 972 EXTERN PetscErrorCode MatGetInertia_SeqSBAIJ(Mat,PetscInt*,PetscInt*,PetscInt*); 973 974 EXTERN PetscErrorCode MatMult_SeqSBAIJ_1(Mat,Vec,Vec); 975 EXTERN PetscErrorCode MatMult_SeqSBAIJ_2(Mat,Vec,Vec); 976 EXTERN PetscErrorCode MatMult_SeqSBAIJ_3(Mat,Vec,Vec); 977 EXTERN PetscErrorCode MatMult_SeqSBAIJ_4(Mat,Vec,Vec); 978 EXTERN PetscErrorCode MatMult_SeqSBAIJ_5(Mat,Vec,Vec); 979 EXTERN PetscErrorCode MatMult_SeqSBAIJ_6(Mat,Vec,Vec); 980 EXTERN PetscErrorCode MatMult_SeqSBAIJ_7(Mat,Vec,Vec); 981 EXTERN PetscErrorCode MatMult_SeqSBAIJ_N(Mat,Vec,Vec); 982 983 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec); 984 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec); 985 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec); 986 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec); 987 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec); 988 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec); 989 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec); 990 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec); 991 992 #ifdef HAVE_MatICCFactor 993 /* modified from MatILUFactor_SeqSBAIJ, needs further work! */ 994 #undef __FUNCT__ 995 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 996 PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info) 997 { 998 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 999 Mat outA; 1000 PetscErrorCode ierr; 1001 PetscTruth row_identity,col_identity; 1002 1003 PetscFunctionBegin; 1004 outA = inA; 1005 inA->factor = FACTOR_CHOLESKY; 1006 1007 if (!a->diag) { 1008 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 1009 } 1010 /* 1011 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1012 for ILU(0) factorization with natural ordering 1013 */ 1014 switch (a->bs) { 1015 case 1: 1016 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1017 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1018 inA->ops->solves = MatSolves_SeqSBAIJ_1; 1019 PetscLoginfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n"); 1020 case 2: 1021 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 1022 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1023 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1024 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1025 break; 1026 case 3: 1027 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 1028 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1029 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1030 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n"); 1031 break; 1032 case 4: 1033 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 1034 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1035 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1036 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1037 break; 1038 case 5: 1039 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 1040 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1041 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1042 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1043 break; 1044 case 6: 1045 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 1046 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1047 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1048 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1049 break; 1050 case 7: 1051 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 1052 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1053 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1054 PetscLogInfo(inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1055 break; 1056 default: 1057 a->row = row; 1058 a->icol = col; 1059 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1060 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1061 1062 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1063 ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr); 1064 PetscLogObjectParent(inA,a->icol); 1065 1066 if (!a->solve_work) { 1067 ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1068 PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar)); 1069 } 1070 } 1071 1072 ierr = MatCholeskyFactorNumeric(inA,&outA);CHKERRQ(ierr); 1073 1074 PetscFunctionReturn(0); 1075 } 1076 #endif 1077 1078 #undef __FUNCT__ 1079 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ" 1080 PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat A) 1081 { 1082 static PetscTruth called = PETSC_FALSE; 1083 MPI_Comm comm = A->comm; 1084 PetscErrorCode ierr; 1085 1086 PetscFunctionBegin; 1087 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1088 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1089 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1090 PetscFunctionReturn(0); 1091 } 1092 1093 EXTERN_C_BEGIN 1094 #undef __FUNCT__ 1095 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1096 PetscErrorCode MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,PetscInt *indices) 1097 { 1098 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1099 PetscInt i,nz,n; 1100 1101 PetscFunctionBegin; 1102 nz = baij->maxnz; 1103 n = mat->n; 1104 for (i=0; i<nz; i++) { 1105 baij->j[i] = indices[i]; 1106 } 1107 baij->nz = nz; 1108 for (i=0; i<n; i++) { 1109 baij->ilen[i] = baij->imax[i]; 1110 } 1111 1112 PetscFunctionReturn(0); 1113 } 1114 EXTERN_C_END 1115 1116 #undef __FUNCT__ 1117 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1118 /*@ 1119 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1120 in the matrix. 1121 1122 Input Parameters: 1123 + mat - the SeqSBAIJ matrix 1124 - indices - the column indices 1125 1126 Level: advanced 1127 1128 Notes: 1129 This can be called if you have precomputed the nonzero structure of the 1130 matrix and want to provide it to the matrix object to improve the performance 1131 of the MatSetValues() operation. 1132 1133 You MUST have set the correct numbers of nonzeros per row in the call to 1134 MatCreateSeqSBAIJ(). 1135 1136 MUST be called before any calls to MatSetValues() 1137 1138 .seealso: MatCreateSeqSBAIJ 1139 @*/ 1140 PetscErrorCode MatSeqSBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1141 { 1142 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1143 1144 PetscFunctionBegin; 1145 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1146 PetscValidPointer(indices,2); 1147 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1148 if (f) { 1149 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1150 } else { 1151 SETERRQ(PETSC_ERR_SUP,"Wrong type of matrix to set column indices"); 1152 } 1153 PetscFunctionReturn(0); 1154 } 1155 1156 #undef __FUNCT__ 1157 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1158 PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A) 1159 { 1160 PetscErrorCode ierr; 1161 1162 PetscFunctionBegin; 1163 ierr = MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1164 PetscFunctionReturn(0); 1165 } 1166 1167 #undef __FUNCT__ 1168 #define __FUNCT__ "MatGetArray_SeqSBAIJ" 1169 PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1170 { 1171 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1172 PetscFunctionBegin; 1173 *array = a->a; 1174 PetscFunctionReturn(0); 1175 } 1176 1177 #undef __FUNCT__ 1178 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ" 1179 PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1180 { 1181 PetscFunctionBegin; 1182 PetscFunctionReturn(0); 1183 } 1184 1185 #include "petscblaslapack.h" 1186 #undef __FUNCT__ 1187 #define __FUNCT__ "MatAXPY_SeqSBAIJ" 1188 PetscErrorCode MatAXPY_SeqSBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str) 1189 { 1190 Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data; 1191 PetscErrorCode ierr; 1192 PetscInt i,bs=Y->bs,bs2,j; 1193 PetscBLASInt bnz = (PetscBLASInt)x->nz,one = 1; 1194 1195 PetscFunctionBegin; 1196 if (str == SAME_NONZERO_PATTERN) { 1197 BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1198 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1199 if (y->xtoy && y->XtoY != X) { 1200 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1201 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1202 } 1203 if (!y->xtoy) { /* get xtoy */ 1204 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1205 y->XtoY = X; 1206 } 1207 bs2 = bs*bs; 1208 for (i=0; i<x->nz; i++) { 1209 j = 0; 1210 while (j < bs2){ 1211 y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]); 1212 j++; 1213 } 1214 } 1215 PetscLogInfo(0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)); 1216 } else { 1217 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1218 } 1219 PetscFunctionReturn(0); 1220 } 1221 1222 #undef __FUNCT__ 1223 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ" 1224 PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg) 1225 { 1226 PetscFunctionBegin; 1227 *flg = PETSC_TRUE; 1228 PetscFunctionReturn(0); 1229 } 1230 1231 #undef __FUNCT__ 1232 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ" 1233 PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg) 1234 { 1235 PetscFunctionBegin; 1236 *flg = PETSC_TRUE; 1237 PetscFunctionReturn(0); 1238 } 1239 1240 #undef __FUNCT__ 1241 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ" 1242 PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscTruth *flg) 1243 { 1244 PetscFunctionBegin; 1245 *flg = PETSC_FALSE; 1246 PetscFunctionReturn(0); 1247 } 1248 1249 /* -------------------------------------------------------------------*/ 1250 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1251 MatGetRow_SeqSBAIJ, 1252 MatRestoreRow_SeqSBAIJ, 1253 MatMult_SeqSBAIJ_N, 1254 /* 4*/ MatMultAdd_SeqSBAIJ_N, 1255 MatMult_SeqSBAIJ_N, 1256 MatMultAdd_SeqSBAIJ_N, 1257 MatSolve_SeqSBAIJ_N, 1258 0, 1259 0, 1260 /*10*/ 0, 1261 0, 1262 MatCholeskyFactor_SeqSBAIJ, 1263 MatRelax_SeqSBAIJ, 1264 MatTranspose_SeqSBAIJ, 1265 /*15*/ MatGetInfo_SeqSBAIJ, 1266 MatEqual_SeqSBAIJ, 1267 MatGetDiagonal_SeqSBAIJ, 1268 MatDiagonalScale_SeqSBAIJ, 1269 MatNorm_SeqSBAIJ, 1270 /*20*/ 0, 1271 MatAssemblyEnd_SeqSBAIJ, 1272 0, 1273 MatSetOption_SeqSBAIJ, 1274 MatZeroEntries_SeqSBAIJ, 1275 /*25*/ MatZeroRows_SeqSBAIJ, 1276 0, 1277 0, 1278 MatCholeskyFactorSymbolic_SeqSBAIJ, 1279 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1280 /*30*/ MatSetUpPreallocation_SeqSBAIJ, 1281 0, 1282 MatICCFactorSymbolic_SeqSBAIJ, 1283 MatGetArray_SeqSBAIJ, 1284 MatRestoreArray_SeqSBAIJ, 1285 /*35*/ MatDuplicate_SeqSBAIJ, 1286 0, 1287 0, 1288 0, 1289 0, 1290 /*40*/ MatAXPY_SeqSBAIJ, 1291 MatGetSubMatrices_SeqSBAIJ, 1292 MatIncreaseOverlap_SeqSBAIJ, 1293 MatGetValues_SeqSBAIJ, 1294 0, 1295 /*45*/ MatPrintHelp_SeqSBAIJ, 1296 MatScale_SeqSBAIJ, 1297 0, 1298 0, 1299 0, 1300 /*50*/ 0, 1301 MatGetRowIJ_SeqSBAIJ, 1302 MatRestoreRowIJ_SeqSBAIJ, 1303 0, 1304 0, 1305 /*55*/ 0, 1306 0, 1307 0, 1308 0, 1309 MatSetValuesBlocked_SeqSBAIJ, 1310 /*60*/ MatGetSubMatrix_SeqSBAIJ, 1311 0, 1312 0, 1313 MatGetPetscMaps_Petsc, 1314 0, 1315 /*65*/ 0, 1316 0, 1317 0, 1318 0, 1319 0, 1320 /*70*/ MatGetRowMax_SeqSBAIJ, 1321 0, 1322 0, 1323 0, 1324 0, 1325 /*75*/ 0, 1326 0, 1327 0, 1328 0, 1329 0, 1330 /*80*/ 0, 1331 0, 1332 0, 1333 #if !defined(PETSC_USE_COMPLEX) 1334 MatGetInertia_SeqSBAIJ, 1335 #else 1336 0, 1337 #endif 1338 MatLoad_SeqSBAIJ, 1339 /*85*/ MatIsSymmetric_SeqSBAIJ, 1340 MatIsHermitian_SeqSBAIJ, 1341 MatIsStructurallySymmetric_SeqSBAIJ, 1342 0, 1343 0, 1344 /*90*/ 0, 1345 0, 1346 0, 1347 0, 1348 0, 1349 /*95*/ 0, 1350 0, 1351 0, 1352 0}; 1353 1354 EXTERN_C_BEGIN 1355 #undef __FUNCT__ 1356 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1357 PetscErrorCode MatStoreValues_SeqSBAIJ(Mat mat) 1358 { 1359 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1360 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 1361 PetscErrorCode ierr; 1362 1363 PetscFunctionBegin; 1364 if (aij->nonew != 1) { 1365 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1366 } 1367 1368 /* allocate space for values if not already there */ 1369 if (!aij->saved_values) { 1370 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1371 } 1372 1373 /* copy values over */ 1374 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1375 PetscFunctionReturn(0); 1376 } 1377 EXTERN_C_END 1378 1379 EXTERN_C_BEGIN 1380 #undef __FUNCT__ 1381 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1382 PetscErrorCode MatRetrieveValues_SeqSBAIJ(Mat mat) 1383 { 1384 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1385 PetscErrorCode ierr; 1386 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 1387 1388 PetscFunctionBegin; 1389 if (aij->nonew != 1) { 1390 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1391 } 1392 if (!aij->saved_values) { 1393 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 1394 } 1395 1396 /* copy values over */ 1397 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1398 PetscFunctionReturn(0); 1399 } 1400 EXTERN_C_END 1401 1402 EXTERN_C_BEGIN 1403 #undef __FUNCT__ 1404 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ" 1405 PetscErrorCode MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 1406 { 1407 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1408 PetscErrorCode ierr; 1409 PetscInt i,len,mbs,bs2; 1410 PetscTruth flg; 1411 1412 PetscFunctionBegin; 1413 B->preallocated = PETSC_TRUE; 1414 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1415 mbs = B->m/bs; 1416 bs2 = bs*bs; 1417 1418 if (mbs*bs != B->m) { 1419 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1420 } 1421 1422 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1423 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 1424 if (nnz) { 1425 for (i=0; i<mbs; i++) { 1426 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 1427 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],mbs); 1428 } 1429 } 1430 1431 ierr = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1432 if (!flg) { 1433 switch (bs) { 1434 case 1: 1435 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1436 B->ops->solve = MatSolve_SeqSBAIJ_1; 1437 B->ops->solves = MatSolves_SeqSBAIJ_1; 1438 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 1439 B->ops->mult = MatMult_SeqSBAIJ_1; 1440 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1441 break; 1442 case 2: 1443 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1444 B->ops->solve = MatSolve_SeqSBAIJ_2; 1445 B->ops->solvetranspose = MatSolve_SeqSBAIJ_2; 1446 B->ops->mult = MatMult_SeqSBAIJ_2; 1447 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1448 break; 1449 case 3: 1450 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1451 B->ops->solve = MatSolve_SeqSBAIJ_3; 1452 B->ops->solvetranspose = MatSolve_SeqSBAIJ_3; 1453 B->ops->mult = MatMult_SeqSBAIJ_3; 1454 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1455 break; 1456 case 4: 1457 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1458 B->ops->solve = MatSolve_SeqSBAIJ_4; 1459 B->ops->solvetranspose = MatSolve_SeqSBAIJ_4; 1460 B->ops->mult = MatMult_SeqSBAIJ_4; 1461 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1462 break; 1463 case 5: 1464 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1465 B->ops->solve = MatSolve_SeqSBAIJ_5; 1466 B->ops->solvetranspose = MatSolve_SeqSBAIJ_5; 1467 B->ops->mult = MatMult_SeqSBAIJ_5; 1468 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1469 break; 1470 case 6: 1471 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1472 B->ops->solve = MatSolve_SeqSBAIJ_6; 1473 B->ops->solvetranspose = MatSolve_SeqSBAIJ_6; 1474 B->ops->mult = MatMult_SeqSBAIJ_6; 1475 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1476 break; 1477 case 7: 1478 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1479 B->ops->solve = MatSolve_SeqSBAIJ_7; 1480 B->ops->solvetranspose = MatSolve_SeqSBAIJ_7; 1481 B->ops->mult = MatMult_SeqSBAIJ_7; 1482 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1483 break; 1484 } 1485 } 1486 1487 b->mbs = mbs; 1488 b->nbs = mbs; 1489 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->imax);CHKERRQ(ierr); 1490 if (!nnz) { 1491 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1492 else if (nz <= 0) nz = 1; 1493 for (i=0; i<mbs; i++) { 1494 b->imax[i] = nz; 1495 } 1496 nz = nz*mbs; /* total nz */ 1497 } else { 1498 nz = 0; 1499 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1500 } 1501 /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1502 1503 /* allocate the matrix space */ 1504 len = nz*sizeof(PetscInt) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(PetscInt); 1505 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1506 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1507 b->j = (PetscInt*)(b->a + nz*bs2); 1508 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1509 b->i = b->j + nz; 1510 b->singlemalloc = PETSC_TRUE; 1511 1512 /* pointer to beginning of each row */ 1513 b->i[0] = 0; 1514 for (i=1; i<mbs+1; i++) { 1515 b->i[i] = b->i[i-1] + b->imax[i-1]; 1516 } 1517 1518 /* b->ilen will count nonzeros in each block row so far. */ 1519 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->ilen);CHKERRQ(ierr); 1520 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1521 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1522 1523 B->bs = bs; 1524 b->bs2 = bs2; 1525 b->nz = 0; 1526 b->maxnz = nz*bs2; 1527 1528 b->inew = 0; 1529 b->jnew = 0; 1530 b->anew = 0; 1531 b->a2anew = 0; 1532 b->permute = PETSC_FALSE; 1533 PetscFunctionReturn(0); 1534 } 1535 EXTERN_C_END 1536 1537 EXTERN_C_BEGIN 1538 EXTERN PetscErrorCode MatConvert_SeqSBAIJ_SeqAIJ(Mat,const MatType,Mat*); 1539 EXTERN PetscErrorCode MatConvert_SeqSBAIJ_SeqBAIJ(Mat,const MatType,Mat*); 1540 EXTERN_C_END 1541 1542 /*MC 1543 MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices, 1544 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1545 1546 Options Database Keys: 1547 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions() 1548 1549 Level: beginner 1550 1551 .seealso: MatCreateSeqSBAIJ 1552 M*/ 1553 1554 EXTERN_C_BEGIN 1555 #undef __FUNCT__ 1556 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1557 PetscErrorCode MatCreate_SeqSBAIJ(Mat B) 1558 { 1559 Mat_SeqSBAIJ *b; 1560 PetscErrorCode ierr; 1561 PetscMPIInt size; 1562 1563 PetscFunctionBegin; 1564 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1565 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1566 B->m = B->M = PetscMax(B->m,B->M); 1567 B->n = B->N = PetscMax(B->n,B->N); 1568 1569 ierr = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1570 B->data = (void*)b; 1571 ierr = PetscMemzero(b,sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr); 1572 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1573 B->ops->destroy = MatDestroy_SeqSBAIJ; 1574 B->ops->view = MatView_SeqSBAIJ; 1575 B->factor = 0; 1576 B->lupivotthreshold = 1.0; 1577 B->mapping = 0; 1578 b->row = 0; 1579 b->icol = 0; 1580 b->reallocs = 0; 1581 b->saved_values = 0; 1582 1583 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1584 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1585 1586 b->sorted = PETSC_FALSE; 1587 b->roworiented = PETSC_TRUE; 1588 b->nonew = 0; 1589 b->diag = 0; 1590 b->solve_work = 0; 1591 b->mult_work = 0; 1592 B->spptr = 0; 1593 b->keepzeroedrows = PETSC_FALSE; 1594 b->xtoy = 0; 1595 b->XtoY = 0; 1596 1597 b->inew = 0; 1598 b->jnew = 0; 1599 b->anew = 0; 1600 b->a2anew = 0; 1601 b->permute = PETSC_FALSE; 1602 1603 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1604 "MatStoreValues_SeqSBAIJ", 1605 MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1606 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1607 "MatRetrieveValues_SeqSBAIJ", 1608 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1609 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1610 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1611 MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1612 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C", 1613 "MatConvert_SeqSBAIJ_SeqAIJ", 1614 MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr); 1615 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C", 1616 "MatConvert_SeqSBAIJ_SeqBAIJ", 1617 MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr); 1618 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C", 1619 "MatSeqSBAIJSetPreallocation_SeqSBAIJ", 1620 MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr); 1621 1622 B->symmetric = PETSC_TRUE; 1623 B->structurally_symmetric = PETSC_TRUE; 1624 B->symmetric_set = PETSC_TRUE; 1625 B->structurally_symmetric_set = PETSC_TRUE; 1626 PetscFunctionReturn(0); 1627 } 1628 EXTERN_C_END 1629 1630 #undef __FUNCT__ 1631 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1632 /*@C 1633 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1634 compressed row) format. For good matrix assembly performance the 1635 user should preallocate the matrix storage by setting the parameter nz 1636 (or the array nnz). By setting these parameters accurately, performance 1637 during matrix assembly can be increased by more than a factor of 50. 1638 1639 Collective on Mat 1640 1641 Input Parameters: 1642 + A - the symmetric matrix 1643 . bs - size of block 1644 . nz - number of block nonzeros per block row (same for all rows) 1645 - nnz - array containing the number of block nonzeros in the upper triangular plus 1646 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1647 1648 Options Database Keys: 1649 . -mat_no_unroll - uses code that does not unroll the loops in the 1650 block calculations (much slower) 1651 . -mat_block_size - size of the blocks to use 1652 1653 Level: intermediate 1654 1655 Notes: 1656 Specify the preallocated storage with either nz or nnz (not both). 1657 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1658 allocation. For additional details, see the users manual chapter on 1659 matrices. 1660 1661 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1662 @*/ 1663 PetscErrorCode MatSeqSBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 1664 { 1665 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 1666 1667 PetscFunctionBegin; 1668 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1669 if (f) { 1670 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 1671 } 1672 PetscFunctionReturn(0); 1673 } 1674 1675 #undef __FUNCT__ 1676 #define __FUNCT__ "MatCreateSeqSBAIJ" 1677 /*@C 1678 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1679 compressed row) format. For good matrix assembly performance the 1680 user should preallocate the matrix storage by setting the parameter nz 1681 (or the array nnz). By setting these parameters accurately, performance 1682 during matrix assembly can be increased by more than a factor of 50. 1683 1684 Collective on MPI_Comm 1685 1686 Input Parameters: 1687 + comm - MPI communicator, set to PETSC_COMM_SELF 1688 . bs - size of block 1689 . m - number of rows, or number of columns 1690 . nz - number of block nonzeros per block row (same for all rows) 1691 - nnz - array containing the number of block nonzeros in the upper triangular plus 1692 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1693 1694 Output Parameter: 1695 . A - the symmetric matrix 1696 1697 Options Database Keys: 1698 . -mat_no_unroll - uses code that does not unroll the loops in the 1699 block calculations (much slower) 1700 . -mat_block_size - size of the blocks to use 1701 1702 Level: intermediate 1703 1704 Notes: 1705 1706 Specify the preallocated storage with either nz or nnz (not both). 1707 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1708 allocation. For additional details, see the users manual chapter on 1709 matrices. 1710 1711 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1712 @*/ 1713 PetscErrorCode MatCreateSeqSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 1714 { 1715 PetscErrorCode ierr; 1716 1717 PetscFunctionBegin; 1718 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1719 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1720 ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1721 PetscFunctionReturn(0); 1722 } 1723 1724 #undef __FUNCT__ 1725 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1726 PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1727 { 1728 Mat C; 1729 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1730 PetscErrorCode ierr; 1731 PetscInt i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 1732 1733 PetscFunctionBegin; 1734 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1735 1736 *B = 0; 1737 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1738 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 1739 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1740 c = (Mat_SeqSBAIJ*)C->data; 1741 1742 C->preallocated = PETSC_TRUE; 1743 C->factor = A->factor; 1744 c->row = 0; 1745 c->icol = 0; 1746 c->saved_values = 0; 1747 c->keepzeroedrows = a->keepzeroedrows; 1748 C->assembled = PETSC_TRUE; 1749 1750 C->M = A->M; 1751 C->N = A->N; 1752 C->bs = A->bs; 1753 c->bs2 = a->bs2; 1754 c->mbs = a->mbs; 1755 c->nbs = a->nbs; 1756 1757 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->imax);CHKERRQ(ierr); 1758 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->ilen);CHKERRQ(ierr); 1759 for (i=0; i<mbs; i++) { 1760 c->imax[i] = a->imax[i]; 1761 c->ilen[i] = a->ilen[i]; 1762 } 1763 1764 /* allocate the matrix space */ 1765 c->singlemalloc = PETSC_TRUE; 1766 len = (mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt)); 1767 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1768 c->j = (PetscInt*)(c->a + nz*bs2); 1769 c->i = c->j + nz; 1770 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1771 if (mbs > 0) { 1772 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1773 if (cpvalues == MAT_COPY_VALUES) { 1774 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1775 } else { 1776 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1777 } 1778 } 1779 1780 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ)); 1781 c->sorted = a->sorted; 1782 c->roworiented = a->roworiented; 1783 c->nonew = a->nonew; 1784 1785 if (a->diag) { 1786 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 1787 PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt)); 1788 for (i=0; i<mbs; i++) { 1789 c->diag[i] = a->diag[i]; 1790 } 1791 } else c->diag = 0; 1792 c->nz = a->nz; 1793 c->maxnz = a->maxnz; 1794 c->solve_work = 0; 1795 c->mult_work = 0; 1796 *B = C; 1797 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1798 PetscFunctionReturn(0); 1799 } 1800 1801 #undef __FUNCT__ 1802 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1803 PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer,const MatType type,Mat *A) 1804 { 1805 Mat_SeqSBAIJ *a; 1806 Mat B; 1807 PetscErrorCode ierr; 1808 int fd; 1809 PetscMPIInt size; 1810 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 1811 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 1812 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 1813 PetscInt *masked,nmask,tmp,bs2,ishift; 1814 PetscScalar *aa; 1815 MPI_Comm comm = ((PetscObject)viewer)->comm; 1816 1817 PetscFunctionBegin; 1818 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1819 bs2 = bs*bs; 1820 1821 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1822 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1823 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1824 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1825 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1826 M = header[1]; N = header[2]; nz = header[3]; 1827 1828 if (header[3] < 0) { 1829 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1830 } 1831 1832 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1833 1834 /* 1835 This code adds extra rows to make sure the number of rows is 1836 divisible by the blocksize 1837 */ 1838 mbs = M/bs; 1839 extra_rows = bs - M + bs*(mbs); 1840 if (extra_rows == bs) extra_rows = 0; 1841 else mbs++; 1842 if (extra_rows) { 1843 PetscLogInfo(0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n"); 1844 } 1845 1846 /* read in row lengths */ 1847 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1848 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1849 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1850 1851 /* read in column indices */ 1852 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1853 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1854 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1855 1856 /* loop over row lengths determining block row lengths */ 1857 ierr = PetscMalloc(mbs*sizeof(PetscInt),&s_browlengths);CHKERRQ(ierr); 1858 ierr = PetscMemzero(s_browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1859 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 1860 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1861 masked = mask + mbs; 1862 rowcount = 0; nzcount = 0; 1863 for (i=0; i<mbs; i++) { 1864 nmask = 0; 1865 for (j=0; j<bs; j++) { 1866 kmax = rowlengths[rowcount]; 1867 for (k=0; k<kmax; k++) { 1868 tmp = jj[nzcount++]/bs; /* block col. index */ 1869 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1870 } 1871 rowcount++; 1872 } 1873 s_browlengths[i] += nmask; 1874 1875 /* zero out the mask elements we set */ 1876 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1877 } 1878 1879 /* create our matrix */ 1880 ierr = MatCreate(comm,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows,&B);CHKERRQ(ierr); 1881 ierr = MatSetType(B,type);CHKERRQ(ierr); 1882 ierr = MatSeqSBAIJSetPreallocation(B,bs,0,s_browlengths);CHKERRQ(ierr); 1883 a = (Mat_SeqSBAIJ*)B->data; 1884 1885 /* set matrix "i" values */ 1886 a->i[0] = 0; 1887 for (i=1; i<= mbs; i++) { 1888 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1889 a->ilen[i-1] = s_browlengths[i-1]; 1890 } 1891 a->nz = a->i[mbs]; 1892 1893 /* read in nonzero values */ 1894 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1895 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1896 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1897 1898 /* set "a" and "j" values into matrix */ 1899 nzcount = 0; jcount = 0; 1900 for (i=0; i<mbs; i++) { 1901 nzcountb = nzcount; 1902 nmask = 0; 1903 for (j=0; j<bs; j++) { 1904 kmax = rowlengths[i*bs+j]; 1905 for (k=0; k<kmax; k++) { 1906 tmp = jj[nzcount++]/bs; /* block col. index */ 1907 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1908 } 1909 } 1910 /* sort the masked values */ 1911 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1912 1913 /* set "j" values into matrix */ 1914 maskcount = 1; 1915 for (j=0; j<nmask; j++) { 1916 a->j[jcount++] = masked[j]; 1917 mask[masked[j]] = maskcount++; 1918 } 1919 1920 /* set "a" values into matrix */ 1921 ishift = bs2*a->i[i]; 1922 for (j=0; j<bs; j++) { 1923 kmax = rowlengths[i*bs+j]; 1924 for (k=0; k<kmax; k++) { 1925 tmp = jj[nzcountb]/bs ; /* block col. index */ 1926 if (tmp >= i){ 1927 block = mask[tmp] - 1; 1928 point = jj[nzcountb] - bs*tmp; 1929 idx = ishift + bs2*block + j + bs*point; 1930 a->a[idx] = aa[nzcountb]; 1931 } 1932 nzcountb++; 1933 } 1934 } 1935 /* zero out the mask elements we set */ 1936 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1937 } 1938 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1939 1940 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1941 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1942 ierr = PetscFree(aa);CHKERRQ(ierr); 1943 ierr = PetscFree(jj);CHKERRQ(ierr); 1944 ierr = PetscFree(mask);CHKERRQ(ierr); 1945 1946 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1947 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1948 ierr = MatView_Private(B);CHKERRQ(ierr); 1949 *A = B; 1950 PetscFunctionReturn(0); 1951 } 1952 1953 #undef __FUNCT__ 1954 #define __FUNCT__ "MatRelax_SeqSBAIJ" 1955 PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1956 { 1957 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1958 MatScalar *aa=a->a,*v,*v1; 1959 PetscScalar *x,*b,*t,sum,d; 1960 PetscErrorCode ierr; 1961 PetscInt m=a->mbs,bs=A->bs,*ai=a->i,*aj=a->j; 1962 PetscInt nz,nz1,*vj,*vj1,i; 1963 1964 PetscFunctionBegin; 1965 its = its*lits; 1966 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1967 1968 if (bs > 1) 1969 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 1970 1971 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1972 if (xx != bb) { 1973 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1974 } else { 1975 b = x; 1976 } 1977 1978 ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr); 1979 1980 if (flag & SOR_ZERO_INITIAL_GUESS) { 1981 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1982 for (i=0; i<m; i++) 1983 t[i] = b[i]; 1984 1985 for (i=0; i<m; i++){ 1986 d = *(aa + ai[i]); /* diag[i] */ 1987 v = aa + ai[i] + 1; 1988 vj = aj + ai[i] + 1; 1989 nz = ai[i+1] - ai[i] - 1; 1990 x[i] = omega*t[i]/d; 1991 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 1992 PetscLogFlops(2*nz-1); 1993 } 1994 } 1995 1996 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 1997 for (i=0; i<m; i++) 1998 t[i] = b[i]; 1999 2000 for (i=0; i<m-1; i++){ /* update rhs */ 2001 v = aa + ai[i] + 1; 2002 vj = aj + ai[i] + 1; 2003 nz = ai[i+1] - ai[i] - 1; 2004 while (nz--) t[*vj++] -= x[i]*(*v++); 2005 PetscLogFlops(2*nz-1); 2006 } 2007 for (i=m-1; i>=0; i--){ 2008 d = *(aa + ai[i]); 2009 v = aa + ai[i] + 1; 2010 vj = aj + ai[i] + 1; 2011 nz = ai[i+1] - ai[i] - 1; 2012 sum = t[i]; 2013 while (nz--) sum -= x[*vj++]*(*v++); 2014 PetscLogFlops(2*nz-1); 2015 x[i] = (1-omega)*x[i] + omega*sum/d; 2016 } 2017 } 2018 its--; 2019 } 2020 2021 while (its--) { 2022 /* 2023 forward sweep: 2024 for i=0,...,m-1: 2025 sum[i] = (b[i] - U(i,:)x )/d[i]; 2026 x[i] = (1-omega)x[i] + omega*sum[i]; 2027 b = b - x[i]*U^T(i,:); 2028 2029 */ 2030 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2031 for (i=0; i<m; i++) 2032 t[i] = b[i]; 2033 2034 for (i=0; i<m; i++){ 2035 d = *(aa + ai[i]); /* diag[i] */ 2036 v = aa + ai[i] + 1; v1=v; 2037 vj = aj + ai[i] + 1; vj1=vj; 2038 nz = ai[i+1] - ai[i] - 1; nz1=nz; 2039 sum = t[i]; 2040 while (nz1--) sum -= (*v1++)*x[*vj1++]; 2041 x[i] = (1-omega)*x[i] + omega*sum/d; 2042 while (nz--) t[*vj++] -= x[i]*(*v++); 2043 PetscLogFlops(4*nz-2); 2044 } 2045 } 2046 2047 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2048 /* 2049 backward sweep: 2050 b = b - x[i]*U^T(i,:), i=0,...,n-2 2051 for i=m-1,...,0: 2052 sum[i] = (b[i] - U(i,:)x )/d[i]; 2053 x[i] = (1-omega)x[i] + omega*sum[i]; 2054 */ 2055 for (i=0; i<m; i++) 2056 t[i] = b[i]; 2057 2058 for (i=0; i<m-1; i++){ /* update rhs */ 2059 v = aa + ai[i] + 1; 2060 vj = aj + ai[i] + 1; 2061 nz = ai[i+1] - ai[i] - 1; 2062 while (nz--) t[*vj++] -= x[i]*(*v++); 2063 PetscLogFlops(2*nz-1); 2064 } 2065 for (i=m-1; i>=0; i--){ 2066 d = *(aa + ai[i]); 2067 v = aa + ai[i] + 1; 2068 vj = aj + ai[i] + 1; 2069 nz = ai[i+1] - ai[i] - 1; 2070 sum = t[i]; 2071 while (nz--) sum -= x[*vj++]*(*v++); 2072 PetscLogFlops(2*nz-1); 2073 x[i] = (1-omega)*x[i] + omega*sum/d; 2074 } 2075 } 2076 } 2077 2078 ierr = PetscFree(t);CHKERRQ(ierr); 2079 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2080 if (bb != xx) { 2081 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2082 } 2083 2084 PetscFunctionReturn(0); 2085 } 2086 2087 2088 2089 2090 2091 2092