1 2 /* 3 Defines the basic matrix operations for the SBAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 7 #include "src/inline/spops.h" 8 #include "src/mat/impls/sbaij/seq/sbaij.h" 9 10 #define CHUNKSIZE 10 11 12 /* 13 Checks for missing diagonals 14 */ 15 #undef __FUNCT__ 16 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 17 PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A) 18 { 19 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 20 PetscErrorCode ierr; 21 PetscInt *diag,*jj = a->j,i; 22 23 PetscFunctionBegin; 24 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 25 diag = a->diag; 26 for (i=0; i<a->mbs; i++) { 27 if (jj[diag[i]] != i) SETERRQ1(PETSC_ERR_ARG_CORRUPT,"Matrix is missing diagonal number %D",i); 28 } 29 PetscFunctionReturn(0); 30 } 31 32 #undef __FUNCT__ 33 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 34 PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A) 35 { 36 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 37 PetscErrorCode ierr; 38 PetscInt i,mbs = a->mbs; 39 40 PetscFunctionBegin; 41 if (a->diag) PetscFunctionReturn(0); 42 43 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 44 ierr = PetscLogObjectMemory(A,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 45 for (i=0; i<mbs; i++) a->diag[i] = a->i[i]; 46 PetscFunctionReturn(0); 47 } 48 49 #undef __FUNCT__ 50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 51 static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 52 { 53 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 54 PetscInt n = a->mbs,i; 55 56 PetscFunctionBegin; 57 *nn = n; 58 if (!ia) PetscFunctionReturn(0); 59 60 if (oshift == 1) { 61 /* temporarily add 1 to i and j indices */ 62 PetscInt nz = a->i[n]; 63 for (i=0; i<nz; i++) a->j[i]++; 64 for (i=0; i<n+1; i++) a->i[i]++; 65 *ia = a->i; *ja = a->j; 66 } else { 67 *ia = a->i; *ja = a->j; 68 } 69 PetscFunctionReturn(0); 70 } 71 72 #undef __FUNCT__ 73 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 74 static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 75 { 76 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 77 PetscInt i,n = a->mbs; 78 79 PetscFunctionBegin; 80 if (!ia) PetscFunctionReturn(0); 81 82 if (oshift == 1) { 83 PetscInt nz = a->i[n]-1; 84 for (i=0; i<nz; i++) a->j[i]--; 85 for (i=0; i<n+1; i++) a->i[i]--; 86 } 87 PetscFunctionReturn(0); 88 } 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 92 PetscErrorCode MatDestroy_SeqSBAIJ(Mat A) 93 { 94 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 95 PetscErrorCode ierr; 96 97 PetscFunctionBegin; 98 PetscLogObjectState((PetscObject)A,"Rows=%D, NZ=%D",A->m,a->nz); 99 ierr = PetscFree(a->a);CHKERRQ(ierr); 100 if (!a->singlemalloc) { 101 ierr = PetscFree(a->i);CHKERRQ(ierr); 102 ierr = PetscFree(a->j);CHKERRQ(ierr); 103 } 104 if (a->row) { 105 ierr = ISDestroy(a->row);CHKERRQ(ierr); 106 } 107 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 108 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 109 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 110 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 111 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 112 if (a->solves_work) {ierr = PetscFree(a->solves_work);CHKERRQ(ierr);} 113 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 114 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 115 if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);} 116 117 if (a->inew){ 118 ierr = PetscFree(a->inew);CHKERRQ(ierr); 119 a->inew = 0; 120 } 121 ierr = PetscFree(a);CHKERRQ(ierr); 122 123 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 124 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 125 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 126 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 127 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqbaij_C","",PETSC_NULL);CHKERRQ(ierr); 128 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 129 PetscFunctionReturn(0); 130 } 131 132 #undef __FUNCT__ 133 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 134 PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op) 135 { 136 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 137 PetscErrorCode ierr; 138 139 PetscFunctionBegin; 140 switch (op) { 141 case MAT_ROW_ORIENTED: 142 a->roworiented = PETSC_TRUE; 143 break; 144 case MAT_COLUMN_ORIENTED: 145 a->roworiented = PETSC_FALSE; 146 break; 147 case MAT_COLUMNS_SORTED: 148 a->sorted = PETSC_TRUE; 149 break; 150 case MAT_COLUMNS_UNSORTED: 151 a->sorted = PETSC_FALSE; 152 break; 153 case MAT_KEEP_ZEROED_ROWS: 154 a->keepzeroedrows = PETSC_TRUE; 155 break; 156 case MAT_NO_NEW_NONZERO_LOCATIONS: 157 a->nonew = 1; 158 break; 159 case MAT_NEW_NONZERO_LOCATION_ERR: 160 a->nonew = -1; 161 break; 162 case MAT_NEW_NONZERO_ALLOCATION_ERR: 163 a->nonew = -2; 164 break; 165 case MAT_YES_NEW_NONZERO_LOCATIONS: 166 a->nonew = 0; 167 break; 168 case MAT_ROWS_SORTED: 169 case MAT_ROWS_UNSORTED: 170 case MAT_YES_NEW_DIAGONALS: 171 case MAT_IGNORE_OFF_PROC_ENTRIES: 172 case MAT_USE_HASH_TABLE: 173 ierr = PetscLogInfo((A,"MatSetOption_SeqSBAIJ:Option ignored\n"));CHKERRQ(ierr); 174 break; 175 case MAT_NO_NEW_DIAGONALS: 176 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 177 case MAT_NOT_SYMMETRIC: 178 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 179 case MAT_HERMITIAN: 180 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 181 case MAT_SYMMETRIC: 182 case MAT_STRUCTURALLY_SYMMETRIC: 183 case MAT_NOT_HERMITIAN: 184 case MAT_SYMMETRY_ETERNAL: 185 case MAT_NOT_SYMMETRY_ETERNAL: 186 case MAT_IGNORE_LOWER_TRIANGULAR: 187 a->ignore_ltriangular = PETSC_TRUE; 188 break; 189 case MAT_ERROR_LOWER_TRIANGULAR: 190 a->ignore_ltriangular = PETSC_FALSE; 191 break; 192 default: 193 SETERRQ(PETSC_ERR_SUP,"unknown option"); 194 } 195 PetscFunctionReturn(0); 196 } 197 198 #undef __FUNCT__ 199 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 200 PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **v) 201 { 202 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 203 PetscErrorCode ierr; 204 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2; 205 MatScalar *aa,*aa_i; 206 PetscScalar *v_i; 207 208 PetscFunctionBegin; 209 bs = A->bs; 210 ai = a->i; 211 aj = a->j; 212 aa = a->a; 213 bs2 = a->bs2; 214 215 if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %D out of range", row); 216 217 bn = row/bs; /* Block number */ 218 bp = row % bs; /* Block position */ 219 M = ai[bn+1] - ai[bn]; 220 *ncols = bs*M; 221 222 if (v) { 223 *v = 0; 224 if (*ncols) { 225 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 226 for (i=0; i<M; i++) { /* for each block in the block row */ 227 v_i = *v + i*bs; 228 aa_i = aa + bs2*(ai[bn] + i); 229 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 230 } 231 } 232 } 233 234 if (cols) { 235 *cols = 0; 236 if (*ncols) { 237 ierr = PetscMalloc((*ncols+row)*sizeof(PetscInt),cols);CHKERRQ(ierr); 238 for (i=0; i<M; i++) { /* for each block in the block row */ 239 cols_i = *cols + i*bs; 240 itmp = bs*aj[ai[bn] + i]; 241 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 242 } 243 } 244 } 245 246 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 247 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 248 #ifdef column_search 249 v_i = *v + M*bs; 250 cols_i = *cols + M*bs; 251 for (i=0; i<bn; i++){ /* for each block row */ 252 M = ai[i+1] - ai[i]; 253 for (j=0; j<M; j++){ 254 itmp = aj[ai[i] + j]; /* block column value */ 255 if (itmp == bn){ 256 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 257 for (k=0; k<bs; k++) { 258 *cols_i++ = i*bs+k; 259 *v_i++ = aa_i[k]; 260 } 261 *ncols += bs; 262 break; 263 } 264 } 265 } 266 #endif 267 PetscFunctionReturn(0); 268 } 269 270 #undef __FUNCT__ 271 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 272 PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 273 { 274 PetscErrorCode ierr; 275 276 PetscFunctionBegin; 277 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 278 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 279 PetscFunctionReturn(0); 280 } 281 282 #undef __FUNCT__ 283 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 284 PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,Mat *B) 285 { 286 PetscErrorCode ierr; 287 PetscFunctionBegin; 288 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 289 PetscFunctionReturn(0); 290 } 291 292 #undef __FUNCT__ 293 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 294 static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 295 { 296 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 297 PetscErrorCode ierr; 298 PetscInt i,j,bs = A->bs,k,l,bs2=a->bs2; 299 char *name; 300 PetscViewerFormat format; 301 302 PetscFunctionBegin; 303 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 304 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 305 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 306 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 307 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 308 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 309 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 310 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 311 for (i=0; i<a->mbs; i++) { 312 for (j=0; j<bs; j++) { 313 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 314 for (k=a->i[i]; k<a->i[i+1]; k++) { 315 for (l=0; l<bs; l++) { 316 #if defined(PETSC_USE_COMPLEX) 317 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 318 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 319 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 320 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 321 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 322 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 323 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 324 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 325 } 326 #else 327 if (a->a[bs2*k + l*bs + j] != 0.0) { 328 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 329 } 330 #endif 331 } 332 } 333 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 334 } 335 } 336 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 337 } else { 338 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 339 for (i=0; i<a->mbs; i++) { 340 for (j=0; j<bs; j++) { 341 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 342 for (k=a->i[i]; k<a->i[i+1]; k++) { 343 for (l=0; l<bs; l++) { 344 #if defined(PETSC_USE_COMPLEX) 345 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 346 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 347 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 348 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 349 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 350 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 351 } else { 352 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 353 } 354 #else 355 ierr = PetscViewerASCIIPrintf(viewer," %D %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 356 #endif 357 } 358 } 359 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 360 } 361 } 362 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 363 } 364 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 365 PetscFunctionReturn(0); 366 } 367 368 #undef __FUNCT__ 369 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 370 static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 371 { 372 Mat A = (Mat) Aa; 373 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 374 PetscErrorCode ierr; 375 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->bs,bs2=a->bs2; 376 PetscMPIInt rank; 377 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 378 MatScalar *aa; 379 MPI_Comm comm; 380 PetscViewer viewer; 381 382 PetscFunctionBegin; 383 /* 384 This is nasty. If this is called from an originally parallel matrix 385 then all processes call this,but only the first has the matrix so the 386 rest should return immediately. 387 */ 388 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 389 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 390 if (rank) PetscFunctionReturn(0); 391 392 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 393 394 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 395 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 396 397 /* loop over matrix elements drawing boxes */ 398 color = PETSC_DRAW_BLUE; 399 for (i=0,row=0; i<mbs; i++,row+=bs) { 400 for (j=a->i[i]; j<a->i[i+1]; j++) { 401 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 402 x_l = a->j[j]*bs; x_r = x_l + 1.0; 403 aa = a->a + j*bs2; 404 for (k=0; k<bs; k++) { 405 for (l=0; l<bs; l++) { 406 if (PetscRealPart(*aa++) >= 0.) continue; 407 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 408 } 409 } 410 } 411 } 412 color = PETSC_DRAW_CYAN; 413 for (i=0,row=0; i<mbs; i++,row+=bs) { 414 for (j=a->i[i]; j<a->i[i+1]; j++) { 415 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 416 x_l = a->j[j]*bs; x_r = x_l + 1.0; 417 aa = a->a + j*bs2; 418 for (k=0; k<bs; k++) { 419 for (l=0; l<bs; l++) { 420 if (PetscRealPart(*aa++) != 0.) continue; 421 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 422 } 423 } 424 } 425 } 426 427 color = PETSC_DRAW_RED; 428 for (i=0,row=0; i<mbs; i++,row+=bs) { 429 for (j=a->i[i]; j<a->i[i+1]; j++) { 430 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 431 x_l = a->j[j]*bs; x_r = x_l + 1.0; 432 aa = a->a + j*bs2; 433 for (k=0; k<bs; k++) { 434 for (l=0; l<bs; l++) { 435 if (PetscRealPart(*aa++) <= 0.) continue; 436 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 437 } 438 } 439 } 440 } 441 PetscFunctionReturn(0); 442 } 443 444 #undef __FUNCT__ 445 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 446 static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 447 { 448 PetscErrorCode ierr; 449 PetscReal xl,yl,xr,yr,w,h; 450 PetscDraw draw; 451 PetscTruth isnull; 452 453 PetscFunctionBegin; 454 455 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 456 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 457 458 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 459 xr = A->m; yr = A->m; h = yr/10.0; w = xr/10.0; 460 xr += w; yr += h; xl = -w; yl = -h; 461 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 462 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 463 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 464 PetscFunctionReturn(0); 465 } 466 467 #undef __FUNCT__ 468 #define __FUNCT__ "MatView_SeqSBAIJ" 469 PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 470 { 471 PetscErrorCode ierr; 472 PetscTruth iascii,isdraw; 473 474 PetscFunctionBegin; 475 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 476 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 477 if (iascii){ 478 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 479 } else if (isdraw) { 480 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 481 } else { 482 Mat B; 483 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 484 ierr = MatView(B,viewer);CHKERRQ(ierr); 485 ierr = MatDestroy(B);CHKERRQ(ierr); 486 } 487 PetscFunctionReturn(0); 488 } 489 490 491 #undef __FUNCT__ 492 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 493 PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 494 { 495 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 496 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 497 PetscInt *ai = a->i,*ailen = a->ilen; 498 PetscInt brow,bcol,ridx,cidx,bs=A->bs,bs2=a->bs2; 499 MatScalar *ap,*aa = a->a,zero = 0.0; 500 501 PetscFunctionBegin; 502 for (k=0; k<m; k++) { /* loop over rows */ 503 row = im[k]; brow = row/bs; 504 if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",row); 505 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1); 506 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 507 nrow = ailen[brow]; 508 for (l=0; l<n; l++) { /* loop over columns */ 509 if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",in[l]); 510 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->n-1); 511 col = in[l] ; 512 bcol = col/bs; 513 cidx = col%bs; 514 ridx = row%bs; 515 high = nrow; 516 low = 0; /* assume unsorted */ 517 while (high-low > 5) { 518 t = (low+high)/2; 519 if (rp[t] > bcol) high = t; 520 else low = t; 521 } 522 for (i=low; i<high; i++) { 523 if (rp[i] > bcol) break; 524 if (rp[i] == bcol) { 525 *v++ = ap[bs2*i+bs*cidx+ridx]; 526 goto finished; 527 } 528 } 529 *v++ = zero; 530 finished:; 531 } 532 } 533 PetscFunctionReturn(0); 534 } 535 536 537 #undef __FUNCT__ 538 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 539 PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 540 { 541 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 542 PetscErrorCode ierr; 543 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 544 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 545 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->bs,stepval; 546 PetscTruth roworiented=a->roworiented; 547 const MatScalar *value = v; 548 MatScalar *ap,*aa = a->a,*bap; 549 550 PetscFunctionBegin; 551 if (roworiented) { 552 stepval = (n-1)*bs; 553 } else { 554 stepval = (m-1)*bs; 555 } 556 for (k=0; k<m; k++) { /* loop over added rows */ 557 row = im[k]; 558 if (row < 0) continue; 559 #if defined(PETSC_USE_DEBUG) 560 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 561 #endif 562 rp = aj + ai[row]; 563 ap = aa + bs2*ai[row]; 564 rmax = imax[row]; 565 nrow = ailen[row]; 566 low = 0; 567 high = nrow; 568 for (l=0; l<n; l++) { /* loop over added columns */ 569 if (in[l] < 0) continue; 570 col = in[l]; 571 #if defined(PETSC_USE_DEBUG) 572 if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",col,a->nbs-1); 573 #endif 574 if (col < row) continue; /* ignore lower triangular block */ 575 if (roworiented) { 576 value = v + k*(stepval+bs)*bs + l*bs; 577 } else { 578 value = v + l*(stepval+bs)*bs + k*bs; 579 } 580 if (col < lastcol) low = 0; else high = nrow; 581 lastcol = col; 582 while (high-low > 7) { 583 t = (low+high)/2; 584 if (rp[t] > col) high = t; 585 else low = t; 586 } 587 for (i=low; i<high; i++) { 588 if (rp[i] > col) break; 589 if (rp[i] == col) { 590 bap = ap + bs2*i; 591 if (roworiented) { 592 if (is == ADD_VALUES) { 593 for (ii=0; ii<bs; ii++,value+=stepval) { 594 for (jj=ii; jj<bs2; jj+=bs) { 595 bap[jj] += *value++; 596 } 597 } 598 } else { 599 for (ii=0; ii<bs; ii++,value+=stepval) { 600 for (jj=ii; jj<bs2; jj+=bs) { 601 bap[jj] = *value++; 602 } 603 } 604 } 605 } else { 606 if (is == ADD_VALUES) { 607 for (ii=0; ii<bs; ii++,value+=stepval) { 608 for (jj=0; jj<bs; jj++) { 609 *bap++ += *value++; 610 } 611 } 612 } else { 613 for (ii=0; ii<bs; ii++,value+=stepval) { 614 for (jj=0; jj<bs; jj++) { 615 *bap++ = *value++; 616 } 617 } 618 } 619 } 620 goto noinsert2; 621 } 622 } 623 if (nonew == 1) goto noinsert2; 624 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 625 if (nrow >= rmax) { 626 /* there is no extra room in row, therefore enlarge */ 627 PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 628 MatScalar *new_a; 629 630 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 631 632 /* malloc new storage space */ 633 len = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); 634 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 635 new_j = (PetscInt*)(new_a + bs2*new_nz); 636 new_i = new_j + new_nz; 637 638 /* copy over old data into new slots */ 639 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 640 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 641 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); 642 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 643 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); 644 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 645 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 646 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 647 /* free up old matrix storage */ 648 ierr = PetscFree(a->a);CHKERRQ(ierr); 649 if (!a->singlemalloc) { 650 ierr = PetscFree(a->i);CHKERRQ(ierr); 651 ierr = PetscFree(a->j);CHKERRQ(ierr); 652 } 653 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 654 a->singlemalloc = PETSC_TRUE; 655 656 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 657 rmax = imax[row] = imax[row] + CHUNKSIZE; 658 ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); 659 a->maxnz += bs2*CHUNKSIZE; 660 a->reallocs++; 661 a->nz++; 662 } 663 N = nrow++ - 1; 664 /* shift up all the later entries in this row */ 665 for (ii=N; ii>=i; ii--) { 666 rp[ii+1] = rp[ii]; 667 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 668 } 669 if (N >= i) { 670 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 671 } 672 rp[i] = col; 673 bap = ap + bs2*i; 674 if (roworiented) { 675 for (ii=0; ii<bs; ii++,value+=stepval) { 676 for (jj=ii; jj<bs2; jj+=bs) { 677 bap[jj] = *value++; 678 } 679 } 680 } else { 681 for (ii=0; ii<bs; ii++,value+=stepval) { 682 for (jj=0; jj<bs; jj++) { 683 *bap++ = *value++; 684 } 685 } 686 } 687 noinsert2:; 688 low = i; 689 } 690 ailen[row] = nrow; 691 } 692 PetscFunctionReturn(0); 693 } 694 695 #undef __FUNCT__ 696 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 697 PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 698 { 699 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 700 PetscErrorCode ierr; 701 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 702 PetscInt m = A->m,*ip,N,*ailen = a->ilen; 703 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 704 MatScalar *aa = a->a,*ap; 705 706 PetscFunctionBegin; 707 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 708 709 if (m) rmax = ailen[0]; 710 for (i=1; i<mbs; i++) { 711 /* move each row back by the amount of empty slots (fshift) before it*/ 712 fshift += imax[i-1] - ailen[i-1]; 713 rmax = PetscMax(rmax,ailen[i]); 714 if (fshift) { 715 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 716 N = ailen[i]; 717 for (j=0; j<N; j++) { 718 ip[j-fshift] = ip[j]; 719 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 720 } 721 } 722 ai[i] = ai[i-1] + ailen[i-1]; 723 } 724 if (mbs) { 725 fshift += imax[mbs-1] - ailen[mbs-1]; 726 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 727 } 728 /* reset ilen and imax for each row */ 729 for (i=0; i<mbs; i++) { 730 ailen[i] = imax[i] = ai[i+1] - ai[i]; 731 } 732 a->nz = ai[mbs]; 733 734 /* diagonals may have moved, reset it */ 735 if (a->diag) { 736 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 737 } 738 ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqSBAIJ:Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n", 739 m,A->m,A->bs,fshift*bs2,a->nz*bs2));CHKERRQ(ierr); 740 ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqSBAIJ:Number of mallocs during MatSetValues is %D\n",a->reallocs));CHKERRQ(ierr); 741 ierr = PetscLogInfo((A,"MatAssemblyEnd_SeqSBAIJ:Most nonzeros blocks in any row is %D\n",rmax));CHKERRQ(ierr); 742 a->reallocs = 0; 743 A->info.nz_unneeded = (PetscReal)fshift*bs2; 744 PetscFunctionReturn(0); 745 } 746 747 /* 748 This function returns an array of flags which indicate the locations of contiguous 749 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 750 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 751 Assume: sizes should be long enough to hold all the values. 752 */ 753 #undef __FUNCT__ 754 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 755 PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 756 { 757 PetscInt i,j,k,row; 758 PetscTruth flg; 759 760 PetscFunctionBegin; 761 for (i=0,j=0; i<n; j++) { 762 row = idx[i]; 763 if (row%bs!=0) { /* Not the begining of a block */ 764 sizes[j] = 1; 765 i++; 766 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 767 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 768 i++; 769 } else { /* Begining of the block, so check if the complete block exists */ 770 flg = PETSC_TRUE; 771 for (k=1; k<bs; k++) { 772 if (row+k != idx[i+k]) { /* break in the block */ 773 flg = PETSC_FALSE; 774 break; 775 } 776 } 777 if (flg) { /* No break in the bs */ 778 sizes[j] = bs; 779 i+= bs; 780 } else { 781 sizes[j] = 1; 782 i++; 783 } 784 } 785 } 786 *bs_max = j; 787 PetscFunctionReturn(0); 788 } 789 790 791 /* Only add/insert a(i,j) with i<=j (blocks). 792 Any a(i,j) with i>j input by user is ingored. 793 */ 794 795 #undef __FUNCT__ 796 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 797 PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 798 { 799 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 800 PetscErrorCode ierr; 801 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 802 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 803 PetscInt *aj=a->j,nonew=a->nonew,bs=A->bs,brow,bcol; 804 PetscInt ridx,cidx,bs2=a->bs2; 805 MatScalar *ap,value,*aa=a->a,*bap; 806 807 PetscFunctionBegin; 808 for (k=0; k<m; k++) { /* loop over added rows */ 809 row = im[k]; /* row number */ 810 brow = row/bs; /* block row number */ 811 if (row < 0) continue; 812 #if defined(PETSC_USE_DEBUG) 813 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1); 814 #endif 815 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 816 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 817 rmax = imax[brow]; /* maximum space allocated for this row */ 818 nrow = ailen[brow]; /* actual length of this row */ 819 low = 0; 820 821 for (l=0; l<n; l++) { /* loop over added columns */ 822 if (in[l] < 0) continue; 823 #if defined(PETSC_USE_DEBUG) 824 if (in[l] >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->m-1); 825 #endif 826 col = in[l]; 827 bcol = col/bs; /* block col number */ 828 829 if (brow > bcol) { 830 if (a->ignore_ltriangular){ 831 continue; /* ignore lower triangular values */ 832 } else { 833 SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR)"); 834 } 835 } 836 837 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 838 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 839 /* element value a(k,l) */ 840 if (roworiented) { 841 value = v[l + k*n]; 842 } else { 843 value = v[k + l*m]; 844 } 845 846 /* move pointer bap to a(k,l) quickly and add/insert value */ 847 if (col < lastcol) low = 0; high = nrow; 848 lastcol = col; 849 while (high-low > 7) { 850 t = (low+high)/2; 851 if (rp[t] > bcol) high = t; 852 else low = t; 853 } 854 for (i=low; i<high; i++) { 855 if (rp[i] > bcol) break; 856 if (rp[i] == bcol) { 857 bap = ap + bs2*i + bs*cidx + ridx; 858 if (is == ADD_VALUES) *bap += value; 859 else *bap = value; 860 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 861 if (brow == bcol && ridx < cidx){ 862 bap = ap + bs2*i + bs*ridx + cidx; 863 if (is == ADD_VALUES) *bap += value; 864 else *bap = value; 865 } 866 goto noinsert1; 867 } 868 } 869 870 if (nonew == 1) goto noinsert1; 871 else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 872 if (nrow >= rmax) { 873 /* there is no extra room in row, therefore enlarge */ 874 PetscInt new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 875 MatScalar *new_a; 876 877 if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 878 879 /* Malloc new storage space */ 880 len = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); 881 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 882 new_j = (PetscInt*)(new_a + bs2*new_nz); 883 new_i = new_j + new_nz; 884 885 /* copy over old data into new slots */ 886 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 887 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 888 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); 889 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 890 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); 891 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 892 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 893 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 894 /* free up old matrix storage */ 895 ierr = PetscFree(a->a);CHKERRQ(ierr); 896 if (!a->singlemalloc) { 897 ierr = PetscFree(a->i);CHKERRQ(ierr); 898 ierr = PetscFree(a->j);CHKERRQ(ierr); 899 } 900 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 901 a->singlemalloc = PETSC_TRUE; 902 903 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 904 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 905 ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); 906 a->maxnz += bs2*CHUNKSIZE; 907 a->reallocs++; 908 a->nz++; 909 } 910 911 N = nrow++ - 1; 912 /* shift up all the later entries in this row */ 913 for (ii=N; ii>=i; ii--) { 914 rp[ii+1] = rp[ii]; 915 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 916 } 917 if (N>=i) { 918 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 919 } 920 rp[i] = bcol; 921 ap[bs2*i + bs*cidx + ridx] = value; 922 noinsert1:; 923 low = i; 924 } 925 } /* end of loop over added columns */ 926 ailen[brow] = nrow; 927 } /* end of loop over added rows */ 928 PetscFunctionReturn(0); 929 } 930 931 EXTERN PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat,IS,MatFactorInfo*,Mat*); 932 EXTERN PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat,IS,MatFactorInfo*); 933 EXTERN PetscErrorCode MatIncreaseOverlap_SeqSBAIJ(Mat,PetscInt,IS[],PetscInt); 934 EXTERN PetscErrorCode MatGetSubMatrix_SeqSBAIJ(Mat,IS,IS,PetscInt,MatReuse,Mat*); 935 EXTERN PetscErrorCode MatGetSubMatrices_SeqSBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat*[]); 936 EXTERN PetscErrorCode MatScale_SeqSBAIJ(const PetscScalar*,Mat); 937 EXTERN PetscErrorCode MatNorm_SeqSBAIJ(Mat,NormType,PetscReal *); 938 EXTERN PetscErrorCode MatEqual_SeqSBAIJ(Mat,Mat,PetscTruth*); 939 EXTERN PetscErrorCode MatGetDiagonal_SeqSBAIJ(Mat,Vec); 940 EXTERN PetscErrorCode MatDiagonalScale_SeqSBAIJ(Mat,Vec,Vec); 941 EXTERN PetscErrorCode MatGetInfo_SeqSBAIJ(Mat,MatInfoType,MatInfo *); 942 EXTERN PetscErrorCode MatZeroEntries_SeqSBAIJ(Mat); 943 EXTERN PetscErrorCode MatGetRowMax_SeqSBAIJ(Mat,Vec); 944 945 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N(Mat,Vec,Vec); 946 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1(Mat,Vec,Vec); 947 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2(Mat,Vec,Vec); 948 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3(Mat,Vec,Vec); 949 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4(Mat,Vec,Vec); 950 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5(Mat,Vec,Vec); 951 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6(Mat,Vec,Vec); 952 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7(Mat,Vec,Vec); 953 954 EXTERN PetscErrorCode MatSolves_SeqSBAIJ_1(Mat,Vecs,Vecs); 955 956 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_1_NaturalOrdering(Mat,Vec,Vec); 957 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_2_NaturalOrdering(Mat,Vec,Vec); 958 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_3_NaturalOrdering(Mat,Vec,Vec); 959 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_4_NaturalOrdering(Mat,Vec,Vec); 960 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_5_NaturalOrdering(Mat,Vec,Vec); 961 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_6_NaturalOrdering(Mat,Vec,Vec); 962 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_7_NaturalOrdering(Mat,Vec,Vec); 963 EXTERN PetscErrorCode MatSolve_SeqSBAIJ_N_NaturalOrdering(Mat,Vec,Vec); 964 965 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat,MatFactorInfo*,Mat*); 966 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1(Mat,MatFactorInfo*,Mat*); 967 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat,MatFactorInfo*,Mat*); 968 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_3(Mat,MatFactorInfo*,Mat*); 969 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_4(Mat,MatFactorInfo*,Mat*); 970 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_5(Mat,MatFactorInfo*,Mat*); 971 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_6(Mat,MatFactorInfo*,Mat*); 972 EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_7(Mat,MatFactorInfo*,Mat*); 973 EXTERN PetscErrorCode MatGetInertia_SeqSBAIJ(Mat,PetscInt*,PetscInt*,PetscInt*); 974 975 EXTERN PetscErrorCode MatMult_SeqSBAIJ_1(Mat,Vec,Vec); 976 EXTERN PetscErrorCode MatMult_SeqSBAIJ_2(Mat,Vec,Vec); 977 EXTERN PetscErrorCode MatMult_SeqSBAIJ_3(Mat,Vec,Vec); 978 EXTERN PetscErrorCode MatMult_SeqSBAIJ_4(Mat,Vec,Vec); 979 EXTERN PetscErrorCode MatMult_SeqSBAIJ_5(Mat,Vec,Vec); 980 EXTERN PetscErrorCode MatMult_SeqSBAIJ_6(Mat,Vec,Vec); 981 EXTERN PetscErrorCode MatMult_SeqSBAIJ_7(Mat,Vec,Vec); 982 EXTERN PetscErrorCode MatMult_SeqSBAIJ_N(Mat,Vec,Vec); 983 984 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_1(Mat,Vec,Vec,Vec); 985 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_2(Mat,Vec,Vec,Vec); 986 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_3(Mat,Vec,Vec,Vec); 987 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_4(Mat,Vec,Vec,Vec); 988 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_5(Mat,Vec,Vec,Vec); 989 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_6(Mat,Vec,Vec,Vec); 990 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_7(Mat,Vec,Vec,Vec); 991 EXTERN PetscErrorCode MatMultAdd_SeqSBAIJ_N(Mat,Vec,Vec,Vec); 992 993 #ifdef HAVE_MatICCFactor 994 /* modified from MatILUFactor_SeqSBAIJ, needs further work! */ 995 #undef __FUNCT__ 996 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 997 PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info) 998 { 999 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 1000 Mat outA; 1001 PetscErrorCode ierr; 1002 PetscTruth row_identity,col_identity; 1003 1004 PetscFunctionBegin; 1005 outA = inA; 1006 inA->factor = FACTOR_CHOLESKY; 1007 1008 if (!a->diag) { 1009 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 1010 } 1011 /* 1012 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1013 for ILU(0) factorization with natural ordering 1014 */ 1015 switch (a->bs) { 1016 case 1: 1017 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1018 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1019 inA->ops->solves = MatSolves_SeqSBAIJ_1; 1020 ierr = PetscLoginfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering solvetrans BS=1\n"));CHKERRQ(ierr); 1021 case 2: 1022 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 1023 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1024 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 1025 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=2\n"));CHKERRQ(ierr); 1026 break; 1027 case 3: 1028 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 1029 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1030 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 1031 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=3\n"));CHKERRQ(ierr); 1032 break; 1033 case 4: 1034 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 1035 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1036 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 1037 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=4\n"));CHKERRQ(ierr); 1038 break; 1039 case 5: 1040 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 1041 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1042 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 1043 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=5\n"));CHKERRQ(ierr); 1044 break; 1045 case 6: 1046 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 1047 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1048 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 1049 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=6\n"));CHKERRQ(ierr); 1050 break; 1051 case 7: 1052 inA->ops->lufactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 1053 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1054 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 1055 ierr = PetscLogInfo((inA,"MatICCFactor_SeqSBAIJ:Using special in-place natural ordering factor and solve BS=7\n"));CHKERRQ(ierr); 1056 break; 1057 default: 1058 a->row = row; 1059 a->icol = col; 1060 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1061 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1062 1063 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1064 ierr = ISInvertPermutation(col,PETSC_DECIDE, &(a->icol));CHKERRQ(ierr); 1065 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 1066 1067 if (!a->solve_work) { 1068 ierr = PetscMalloc((A->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1069 ierr = PetscLogObjectMemory(inA,(A->m+a->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 1070 } 1071 } 1072 1073 ierr = MatCholeskyFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 1074 PetscFunctionReturn(0); 1075 } 1076 #endif 1077 1078 #undef __FUNCT__ 1079 #define __FUNCT__ "MatPrintHelp_SeqSBAIJ" 1080 PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat A) 1081 { 1082 static PetscTruth called = PETSC_FALSE; 1083 MPI_Comm comm = A->comm; 1084 PetscErrorCode ierr; 1085 1086 PetscFunctionBegin; 1087 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1088 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQSBAIJ and MATMPISBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1089 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1090 ierr = (*PetscHelpPrintf)(comm," -mat_ignore_ltriangular: Ignore lower triangular values set by user\n");CHKERRQ(ierr); 1091 PetscFunctionReturn(0); 1092 } 1093 1094 EXTERN_C_BEGIN 1095 #undef __FUNCT__ 1096 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1097 PetscErrorCode MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,PetscInt *indices) 1098 { 1099 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1100 PetscInt i,nz,n; 1101 1102 PetscFunctionBegin; 1103 nz = baij->maxnz; 1104 n = mat->n; 1105 for (i=0; i<nz; i++) { 1106 baij->j[i] = indices[i]; 1107 } 1108 baij->nz = nz; 1109 for (i=0; i<n; i++) { 1110 baij->ilen[i] = baij->imax[i]; 1111 } 1112 PetscFunctionReturn(0); 1113 } 1114 EXTERN_C_END 1115 1116 #undef __FUNCT__ 1117 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1118 /*@ 1119 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1120 in the matrix. 1121 1122 Input Parameters: 1123 + mat - the SeqSBAIJ matrix 1124 - indices - the column indices 1125 1126 Level: advanced 1127 1128 Notes: 1129 This can be called if you have precomputed the nonzero structure of the 1130 matrix and want to provide it to the matrix object to improve the performance 1131 of the MatSetValues() operation. 1132 1133 You MUST have set the correct numbers of nonzeros per row in the call to 1134 MatCreateSeqSBAIJ(), and the columns indices MUST be sorted. 1135 1136 MUST be called before any calls to MatSetValues() 1137 1138 .seealso: MatCreateSeqSBAIJ 1139 @*/ 1140 PetscErrorCode MatSeqSBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1141 { 1142 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1143 1144 PetscFunctionBegin; 1145 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1146 PetscValidPointer(indices,2); 1147 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1148 if (f) { 1149 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1150 } else { 1151 SETERRQ(PETSC_ERR_SUP,"Wrong type of matrix to set column indices"); 1152 } 1153 PetscFunctionReturn(0); 1154 } 1155 1156 #undef __FUNCT__ 1157 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1158 PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A) 1159 { 1160 PetscErrorCode ierr; 1161 1162 PetscFunctionBegin; 1163 ierr = MatSeqSBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1164 PetscFunctionReturn(0); 1165 } 1166 1167 #undef __FUNCT__ 1168 #define __FUNCT__ "MatGetArray_SeqSBAIJ" 1169 PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1170 { 1171 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1172 PetscFunctionBegin; 1173 *array = a->a; 1174 PetscFunctionReturn(0); 1175 } 1176 1177 #undef __FUNCT__ 1178 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ" 1179 PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1180 { 1181 PetscFunctionBegin; 1182 PetscFunctionReturn(0); 1183 } 1184 1185 #include "petscblaslapack.h" 1186 #undef __FUNCT__ 1187 #define __FUNCT__ "MatAXPY_SeqSBAIJ" 1188 PetscErrorCode MatAXPY_SeqSBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str) 1189 { 1190 Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data; 1191 PetscErrorCode ierr; 1192 PetscInt i,bs=Y->bs,bs2,j; 1193 PetscBLASInt bnz = (PetscBLASInt)x->nz,one = 1; 1194 1195 PetscFunctionBegin; 1196 if (str == SAME_NONZERO_PATTERN) { 1197 BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one); 1198 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1199 if (y->xtoy && y->XtoY != X) { 1200 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1201 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1202 } 1203 if (!y->xtoy) { /* get xtoy */ 1204 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1205 y->XtoY = X; 1206 } 1207 bs2 = bs*bs; 1208 for (i=0; i<x->nz; i++) { 1209 j = 0; 1210 while (j < bs2){ 1211 y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]); 1212 j++; 1213 } 1214 } 1215 ierr = PetscLogInfo((0,"MatAXPY_SeqSBAIJ: ratio of nnz_s(X)/nnz_s(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)));CHKERRQ(ierr); 1216 } else { 1217 ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 1218 } 1219 PetscFunctionReturn(0); 1220 } 1221 1222 #undef __FUNCT__ 1223 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ" 1224 PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg) 1225 { 1226 PetscFunctionBegin; 1227 *flg = PETSC_TRUE; 1228 PetscFunctionReturn(0); 1229 } 1230 1231 #undef __FUNCT__ 1232 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ" 1233 PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg) 1234 { 1235 PetscFunctionBegin; 1236 *flg = PETSC_TRUE; 1237 PetscFunctionReturn(0); 1238 } 1239 1240 #undef __FUNCT__ 1241 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ" 1242 PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscTruth *flg) 1243 { 1244 PetscFunctionBegin; 1245 *flg = PETSC_FALSE; 1246 PetscFunctionReturn(0); 1247 } 1248 1249 /* -------------------------------------------------------------------*/ 1250 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1251 MatGetRow_SeqSBAIJ, 1252 MatRestoreRow_SeqSBAIJ, 1253 MatMult_SeqSBAIJ_N, 1254 /* 4*/ MatMultAdd_SeqSBAIJ_N, 1255 MatMult_SeqSBAIJ_N, 1256 MatMultAdd_SeqSBAIJ_N, 1257 MatSolve_SeqSBAIJ_N, 1258 0, 1259 0, 1260 /*10*/ 0, 1261 0, 1262 MatCholeskyFactor_SeqSBAIJ, 1263 MatRelax_SeqSBAIJ, 1264 MatTranspose_SeqSBAIJ, 1265 /*15*/ MatGetInfo_SeqSBAIJ, 1266 MatEqual_SeqSBAIJ, 1267 MatGetDiagonal_SeqSBAIJ, 1268 MatDiagonalScale_SeqSBAIJ, 1269 MatNorm_SeqSBAIJ, 1270 /*20*/ 0, 1271 MatAssemblyEnd_SeqSBAIJ, 1272 0, 1273 MatSetOption_SeqSBAIJ, 1274 MatZeroEntries_SeqSBAIJ, 1275 /*25*/ 0, 1276 0, 1277 0, 1278 MatCholeskyFactorSymbolic_SeqSBAIJ, 1279 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1280 /*30*/ MatSetUpPreallocation_SeqSBAIJ, 1281 0, 1282 MatICCFactorSymbolic_SeqSBAIJ, 1283 MatGetArray_SeqSBAIJ, 1284 MatRestoreArray_SeqSBAIJ, 1285 /*35*/ MatDuplicate_SeqSBAIJ, 1286 0, 1287 0, 1288 0, 1289 0, 1290 /*40*/ MatAXPY_SeqSBAIJ, 1291 MatGetSubMatrices_SeqSBAIJ, 1292 MatIncreaseOverlap_SeqSBAIJ, 1293 MatGetValues_SeqSBAIJ, 1294 0, 1295 /*45*/ MatPrintHelp_SeqSBAIJ, 1296 MatScale_SeqSBAIJ, 1297 0, 1298 0, 1299 0, 1300 /*50*/ 0, 1301 MatGetRowIJ_SeqSBAIJ, 1302 MatRestoreRowIJ_SeqSBAIJ, 1303 0, 1304 0, 1305 /*55*/ 0, 1306 0, 1307 0, 1308 0, 1309 MatSetValuesBlocked_SeqSBAIJ, 1310 /*60*/ MatGetSubMatrix_SeqSBAIJ, 1311 0, 1312 0, 1313 MatGetPetscMaps_Petsc, 1314 0, 1315 /*65*/ 0, 1316 0, 1317 0, 1318 0, 1319 0, 1320 /*70*/ MatGetRowMax_SeqSBAIJ, 1321 0, 1322 0, 1323 0, 1324 0, 1325 /*75*/ 0, 1326 0, 1327 0, 1328 0, 1329 0, 1330 /*80*/ 0, 1331 0, 1332 0, 1333 #if !defined(PETSC_USE_COMPLEX) 1334 MatGetInertia_SeqSBAIJ, 1335 #else 1336 0, 1337 #endif 1338 MatLoad_SeqSBAIJ, 1339 /*85*/ MatIsSymmetric_SeqSBAIJ, 1340 MatIsHermitian_SeqSBAIJ, 1341 MatIsStructurallySymmetric_SeqSBAIJ, 1342 0, 1343 0, 1344 /*90*/ 0, 1345 0, 1346 0, 1347 0, 1348 0, 1349 /*95*/ 0, 1350 0, 1351 0, 1352 0}; 1353 EXTERN_C_BEGIN 1354 #undef __FUNCT__ 1355 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1356 PetscErrorCode MatStoreValues_SeqSBAIJ(Mat mat) 1357 { 1358 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1359 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 1360 PetscErrorCode ierr; 1361 1362 PetscFunctionBegin; 1363 if (aij->nonew != 1) { 1364 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1365 } 1366 1367 /* allocate space for values if not already there */ 1368 if (!aij->saved_values) { 1369 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1370 } 1371 1372 /* copy values over */ 1373 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1374 PetscFunctionReturn(0); 1375 } 1376 EXTERN_C_END 1377 1378 EXTERN_C_BEGIN 1379 #undef __FUNCT__ 1380 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1381 PetscErrorCode MatRetrieveValues_SeqSBAIJ(Mat mat) 1382 { 1383 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1384 PetscErrorCode ierr; 1385 PetscInt nz = aij->i[mat->m]*mat->bs*aij->bs2; 1386 1387 PetscFunctionBegin; 1388 if (aij->nonew != 1) { 1389 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1390 } 1391 if (!aij->saved_values) { 1392 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 1393 } 1394 1395 /* copy values over */ 1396 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1397 PetscFunctionReturn(0); 1398 } 1399 EXTERN_C_END 1400 1401 EXTERN_C_BEGIN 1402 #undef __FUNCT__ 1403 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ" 1404 PetscErrorCode MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 1405 { 1406 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1407 PetscErrorCode ierr; 1408 PetscInt i,len,mbs,bs2; 1409 PetscTruth flg; 1410 1411 PetscFunctionBegin; 1412 B->preallocated = PETSC_TRUE; 1413 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1414 mbs = B->m/bs; 1415 bs2 = bs*bs; 1416 1417 if (mbs*bs != B->m) { 1418 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1419 } 1420 1421 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1422 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 1423 if (nnz) { 1424 for (i=0; i<mbs; i++) { 1425 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 1426 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],mbs); 1427 } 1428 } 1429 1430 ierr = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1431 if (!flg) { 1432 switch (bs) { 1433 case 1: 1434 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1435 B->ops->solve = MatSolve_SeqSBAIJ_1; 1436 B->ops->solves = MatSolves_SeqSBAIJ_1; 1437 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 1438 B->ops->mult = MatMult_SeqSBAIJ_1; 1439 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1440 break; 1441 case 2: 1442 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1443 B->ops->solve = MatSolve_SeqSBAIJ_2; 1444 B->ops->solvetranspose = MatSolve_SeqSBAIJ_2; 1445 B->ops->mult = MatMult_SeqSBAIJ_2; 1446 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1447 break; 1448 case 3: 1449 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1450 B->ops->solve = MatSolve_SeqSBAIJ_3; 1451 B->ops->solvetranspose = MatSolve_SeqSBAIJ_3; 1452 B->ops->mult = MatMult_SeqSBAIJ_3; 1453 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1454 break; 1455 case 4: 1456 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1457 B->ops->solve = MatSolve_SeqSBAIJ_4; 1458 B->ops->solvetranspose = MatSolve_SeqSBAIJ_4; 1459 B->ops->mult = MatMult_SeqSBAIJ_4; 1460 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1461 break; 1462 case 5: 1463 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1464 B->ops->solve = MatSolve_SeqSBAIJ_5; 1465 B->ops->solvetranspose = MatSolve_SeqSBAIJ_5; 1466 B->ops->mult = MatMult_SeqSBAIJ_5; 1467 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1468 break; 1469 case 6: 1470 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1471 B->ops->solve = MatSolve_SeqSBAIJ_6; 1472 B->ops->solvetranspose = MatSolve_SeqSBAIJ_6; 1473 B->ops->mult = MatMult_SeqSBAIJ_6; 1474 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1475 break; 1476 case 7: 1477 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1478 B->ops->solve = MatSolve_SeqSBAIJ_7; 1479 B->ops->solvetranspose = MatSolve_SeqSBAIJ_7; 1480 B->ops->mult = MatMult_SeqSBAIJ_7; 1481 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1482 break; 1483 } 1484 } 1485 1486 b->mbs = mbs; 1487 b->nbs = mbs; 1488 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->imax);CHKERRQ(ierr); 1489 if (!nnz) { 1490 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1491 else if (nz <= 0) nz = 1; 1492 for (i=0; i<mbs; i++) { 1493 b->imax[i] = nz; 1494 } 1495 nz = nz*mbs; /* total nz */ 1496 } else { 1497 nz = 0; 1498 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1499 } 1500 /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1501 1502 /* allocate the matrix space */ 1503 len = nz*sizeof(PetscInt) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(PetscInt); 1504 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1505 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1506 b->j = (PetscInt*)(b->a + nz*bs2); 1507 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1508 b->i = b->j + nz; 1509 b->singlemalloc = PETSC_TRUE; 1510 1511 /* pointer to beginning of each row */ 1512 b->i[0] = 0; 1513 for (i=1; i<mbs+1; i++) { 1514 b->i[i] = b->i[i-1] + b->imax[i-1]; 1515 } 1516 1517 /* b->ilen will count nonzeros in each block row so far. */ 1518 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&b->ilen);CHKERRQ(ierr); 1519 ierr = PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr); 1520 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1521 1522 B->bs = bs; 1523 b->bs2 = bs2; 1524 b->nz = 0; 1525 b->maxnz = nz*bs2; 1526 1527 b->inew = 0; 1528 b->jnew = 0; 1529 b->anew = 0; 1530 b->a2anew = 0; 1531 b->permute = PETSC_FALSE; 1532 PetscFunctionReturn(0); 1533 } 1534 EXTERN_C_END 1535 1536 EXTERN_C_BEGIN 1537 EXTERN PetscErrorCode MatConvert_SeqSBAIJ_SeqAIJ(Mat,const MatType,MatReuse,Mat*); 1538 EXTERN PetscErrorCode MatConvert_SeqSBAIJ_SeqBAIJ(Mat,const MatType,MatReuse,Mat*); 1539 EXTERN_C_END 1540 1541 /*MC 1542 MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices, 1543 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1544 1545 Options Database Keys: 1546 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions() 1547 1548 Level: beginner 1549 1550 .seealso: MatCreateSeqSBAIJ 1551 M*/ 1552 1553 EXTERN_C_BEGIN 1554 #undef __FUNCT__ 1555 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1556 PetscErrorCode MatCreate_SeqSBAIJ(Mat B) 1557 { 1558 Mat_SeqSBAIJ *b; 1559 PetscErrorCode ierr; 1560 PetscMPIInt size; 1561 PetscTruth flg; 1562 1563 PetscFunctionBegin; 1564 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1565 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1566 B->m = B->M = PetscMax(B->m,B->M); 1567 B->n = B->N = PetscMax(B->n,B->N); 1568 1569 ierr = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1570 B->data = (void*)b; 1571 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1572 B->ops->destroy = MatDestroy_SeqSBAIJ; 1573 B->ops->view = MatView_SeqSBAIJ; 1574 B->factor = 0; 1575 B->mapping = 0; 1576 b->row = 0; 1577 b->icol = 0; 1578 b->reallocs = 0; 1579 b->saved_values = 0; 1580 1581 ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr); 1582 ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr); 1583 1584 b->sorted = PETSC_FALSE; 1585 b->roworiented = PETSC_TRUE; 1586 b->nonew = 0; 1587 b->diag = 0; 1588 b->solve_work = 0; 1589 b->mult_work = 0; 1590 B->spptr = 0; 1591 b->keepzeroedrows = PETSC_FALSE; 1592 b->xtoy = 0; 1593 b->XtoY = 0; 1594 1595 b->inew = 0; 1596 b->jnew = 0; 1597 b->anew = 0; 1598 b->a2anew = 0; 1599 b->permute = PETSC_FALSE; 1600 1601 b->ignore_ltriangular = PETSC_FALSE; 1602 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_ignore_lower_triangular",&flg);CHKERRQ(ierr); 1603 if (flg) b->ignore_ltriangular = PETSC_TRUE; 1604 1605 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1606 "MatStoreValues_SeqSBAIJ", 1607 MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1608 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1609 "MatRetrieveValues_SeqSBAIJ", 1610 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1611 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1612 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1613 MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1614 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C", 1615 "MatConvert_SeqSBAIJ_SeqAIJ", 1616 MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr); 1617 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C", 1618 "MatConvert_SeqSBAIJ_SeqBAIJ", 1619 MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr); 1620 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C", 1621 "MatSeqSBAIJSetPreallocation_SeqSBAIJ", 1622 MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr); 1623 1624 B->symmetric = PETSC_TRUE; 1625 B->structurally_symmetric = PETSC_TRUE; 1626 B->symmetric_set = PETSC_TRUE; 1627 B->structurally_symmetric_set = PETSC_TRUE; 1628 PetscFunctionReturn(0); 1629 } 1630 EXTERN_C_END 1631 1632 #undef __FUNCT__ 1633 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1634 /*@C 1635 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1636 compressed row) format. For good matrix assembly performance the 1637 user should preallocate the matrix storage by setting the parameter nz 1638 (or the array nnz). By setting these parameters accurately, performance 1639 during matrix assembly can be increased by more than a factor of 50. 1640 1641 Collective on Mat 1642 1643 Input Parameters: 1644 + A - the symmetric matrix 1645 . bs - size of block 1646 . nz - number of block nonzeros per block row (same for all rows) 1647 - nnz - array containing the number of block nonzeros in the upper triangular plus 1648 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1649 1650 Options Database Keys: 1651 . -mat_no_unroll - uses code that does not unroll the loops in the 1652 block calculations (much slower) 1653 . -mat_block_size - size of the blocks to use 1654 1655 Level: intermediate 1656 1657 Notes: 1658 Specify the preallocated storage with either nz or nnz (not both). 1659 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1660 allocation. For additional details, see the users manual chapter on 1661 matrices. 1662 1663 If the nnz parameter is given then the nz parameter is ignored 1664 1665 1666 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1667 @*/ 1668 PetscErrorCode MatSeqSBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 1669 { 1670 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 1671 1672 PetscFunctionBegin; 1673 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1674 if (f) { 1675 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 1676 } 1677 PetscFunctionReturn(0); 1678 } 1679 1680 #undef __FUNCT__ 1681 #define __FUNCT__ "MatCreateSeqSBAIJ" 1682 /*@C 1683 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1684 compressed row) format. For good matrix assembly performance the 1685 user should preallocate the matrix storage by setting the parameter nz 1686 (or the array nnz). By setting these parameters accurately, performance 1687 during matrix assembly can be increased by more than a factor of 50. 1688 1689 Collective on MPI_Comm 1690 1691 Input Parameters: 1692 + comm - MPI communicator, set to PETSC_COMM_SELF 1693 . bs - size of block 1694 . m - number of rows, or number of columns 1695 . nz - number of block nonzeros per block row (same for all rows) 1696 - nnz - array containing the number of block nonzeros in the upper triangular plus 1697 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1698 1699 Output Parameter: 1700 . A - the symmetric matrix 1701 1702 Options Database Keys: 1703 . -mat_no_unroll - uses code that does not unroll the loops in the 1704 block calculations (much slower) 1705 . -mat_block_size - size of the blocks to use 1706 1707 Level: intermediate 1708 1709 Notes: 1710 1711 Specify the preallocated storage with either nz or nnz (not both). 1712 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1713 allocation. For additional details, see the users manual chapter on 1714 matrices. 1715 1716 If the nnz parameter is given then the nz parameter is ignored 1717 1718 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1719 @*/ 1720 PetscErrorCode MatCreateSeqSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 1721 { 1722 PetscErrorCode ierr; 1723 1724 PetscFunctionBegin; 1725 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1726 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1727 ierr = MatSeqSBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1728 PetscFunctionReturn(0); 1729 } 1730 1731 #undef __FUNCT__ 1732 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1733 PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1734 { 1735 Mat C; 1736 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1737 PetscErrorCode ierr; 1738 PetscInt i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 1739 1740 PetscFunctionBegin; 1741 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1742 1743 *B = 0; 1744 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1745 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 1746 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1747 c = (Mat_SeqSBAIJ*)C->data; 1748 1749 C->preallocated = PETSC_TRUE; 1750 C->factor = A->factor; 1751 c->row = 0; 1752 c->icol = 0; 1753 c->saved_values = 0; 1754 c->keepzeroedrows = a->keepzeroedrows; 1755 C->assembled = PETSC_TRUE; 1756 1757 C->M = A->M; 1758 C->N = A->N; 1759 C->bs = A->bs; 1760 c->bs2 = a->bs2; 1761 c->mbs = a->mbs; 1762 c->nbs = a->nbs; 1763 1764 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->imax);CHKERRQ(ierr); 1765 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->ilen);CHKERRQ(ierr); 1766 for (i=0; i<mbs; i++) { 1767 c->imax[i] = a->imax[i]; 1768 c->ilen[i] = a->ilen[i]; 1769 } 1770 1771 /* allocate the matrix space */ 1772 c->singlemalloc = PETSC_TRUE; 1773 len = (mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt)); 1774 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1775 c->j = (PetscInt*)(c->a + nz*bs2); 1776 c->i = c->j + nz; 1777 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1778 if (mbs > 0) { 1779 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1780 if (cpvalues == MAT_COPY_VALUES) { 1781 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1782 } else { 1783 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1784 } 1785 } 1786 1787 ierr = PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_SeqSBAIJ));CHKERRQ(ierr); 1788 c->sorted = a->sorted; 1789 c->roworiented = a->roworiented; 1790 c->nonew = a->nonew; 1791 1792 if (a->diag) { 1793 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 1794 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1795 for (i=0; i<mbs; i++) { 1796 c->diag[i] = a->diag[i]; 1797 } 1798 } else c->diag = 0; 1799 c->nz = a->nz; 1800 c->maxnz = a->maxnz; 1801 c->solve_work = 0; 1802 c->mult_work = 0; 1803 *B = C; 1804 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1805 PetscFunctionReturn(0); 1806 } 1807 1808 #undef __FUNCT__ 1809 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1810 PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer,const MatType type,Mat *A) 1811 { 1812 Mat_SeqSBAIJ *a; 1813 Mat B; 1814 PetscErrorCode ierr; 1815 int fd; 1816 PetscMPIInt size; 1817 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 1818 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 1819 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 1820 PetscInt *masked,nmask,tmp,bs2,ishift; 1821 PetscScalar *aa; 1822 MPI_Comm comm = ((PetscObject)viewer)->comm; 1823 1824 PetscFunctionBegin; 1825 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1826 bs2 = bs*bs; 1827 1828 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1829 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1830 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1831 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1832 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1833 M = header[1]; N = header[2]; nz = header[3]; 1834 1835 if (header[3] < 0) { 1836 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1837 } 1838 1839 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1840 1841 /* 1842 This code adds extra rows to make sure the number of rows is 1843 divisible by the blocksize 1844 */ 1845 mbs = M/bs; 1846 extra_rows = bs - M + bs*(mbs); 1847 if (extra_rows == bs) extra_rows = 0; 1848 else mbs++; 1849 if (extra_rows) { 1850 ierr = PetscLogInfo((0,"MatLoad_SeqSBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr); 1851 } 1852 1853 /* read in row lengths */ 1854 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1855 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1856 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1857 1858 /* read in column indices */ 1859 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1860 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1861 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1862 1863 /* loop over row lengths determining block row lengths */ 1864 ierr = PetscMalloc(mbs*sizeof(PetscInt),&s_browlengths);CHKERRQ(ierr); 1865 ierr = PetscMemzero(s_browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1866 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 1867 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1868 masked = mask + mbs; 1869 rowcount = 0; nzcount = 0; 1870 for (i=0; i<mbs; i++) { 1871 nmask = 0; 1872 for (j=0; j<bs; j++) { 1873 kmax = rowlengths[rowcount]; 1874 for (k=0; k<kmax; k++) { 1875 tmp = jj[nzcount++]/bs; /* block col. index */ 1876 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1877 } 1878 rowcount++; 1879 } 1880 s_browlengths[i] += nmask; 1881 1882 /* zero out the mask elements we set */ 1883 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1884 } 1885 1886 /* create our matrix */ 1887 ierr = MatCreate(comm,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows,&B);CHKERRQ(ierr); 1888 ierr = MatSetType(B,type);CHKERRQ(ierr); 1889 ierr = MatSeqSBAIJSetPreallocation(B,bs,0,s_browlengths);CHKERRQ(ierr); 1890 a = (Mat_SeqSBAIJ*)B->data; 1891 1892 /* set matrix "i" values */ 1893 a->i[0] = 0; 1894 for (i=1; i<= mbs; i++) { 1895 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1896 a->ilen[i-1] = s_browlengths[i-1]; 1897 } 1898 a->nz = a->i[mbs]; 1899 1900 /* read in nonzero values */ 1901 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1902 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1903 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1904 1905 /* set "a" and "j" values into matrix */ 1906 nzcount = 0; jcount = 0; 1907 for (i=0; i<mbs; i++) { 1908 nzcountb = nzcount; 1909 nmask = 0; 1910 for (j=0; j<bs; j++) { 1911 kmax = rowlengths[i*bs+j]; 1912 for (k=0; k<kmax; k++) { 1913 tmp = jj[nzcount++]/bs; /* block col. index */ 1914 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1915 } 1916 } 1917 /* sort the masked values */ 1918 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1919 1920 /* set "j" values into matrix */ 1921 maskcount = 1; 1922 for (j=0; j<nmask; j++) { 1923 a->j[jcount++] = masked[j]; 1924 mask[masked[j]] = maskcount++; 1925 } 1926 1927 /* set "a" values into matrix */ 1928 ishift = bs2*a->i[i]; 1929 for (j=0; j<bs; j++) { 1930 kmax = rowlengths[i*bs+j]; 1931 for (k=0; k<kmax; k++) { 1932 tmp = jj[nzcountb]/bs ; /* block col. index */ 1933 if (tmp >= i){ 1934 block = mask[tmp] - 1; 1935 point = jj[nzcountb] - bs*tmp; 1936 idx = ishift + bs2*block + j + bs*point; 1937 a->a[idx] = aa[nzcountb]; 1938 } 1939 nzcountb++; 1940 } 1941 } 1942 /* zero out the mask elements we set */ 1943 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1944 } 1945 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1946 1947 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1948 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1949 ierr = PetscFree(aa);CHKERRQ(ierr); 1950 ierr = PetscFree(jj);CHKERRQ(ierr); 1951 ierr = PetscFree(mask);CHKERRQ(ierr); 1952 1953 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1954 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1955 ierr = MatView_Private(B);CHKERRQ(ierr); 1956 *A = B; 1957 PetscFunctionReturn(0); 1958 } 1959 1960 #undef __FUNCT__ 1961 #define __FUNCT__ "MatRelax_SeqSBAIJ" 1962 PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1963 { 1964 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1965 MatScalar *aa=a->a,*v,*v1; 1966 PetscScalar *x,*b,*t,sum,d; 1967 PetscErrorCode ierr; 1968 PetscInt m=a->mbs,bs=A->bs,*ai=a->i,*aj=a->j; 1969 PetscInt nz,nz1,*vj,*vj1,i; 1970 1971 PetscFunctionBegin; 1972 its = its*lits; 1973 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1974 1975 if (bs > 1) 1976 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 1977 1978 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1979 if (xx != bb) { 1980 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1981 } else { 1982 b = x; 1983 } 1984 1985 ierr = PetscMalloc(m*sizeof(PetscScalar),&t);CHKERRQ(ierr); 1986 1987 if (flag & SOR_ZERO_INITIAL_GUESS) { 1988 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 1989 for (i=0; i<m; i++) 1990 t[i] = b[i]; 1991 1992 for (i=0; i<m; i++){ 1993 d = *(aa + ai[i]); /* diag[i] */ 1994 v = aa + ai[i] + 1; 1995 vj = aj + ai[i] + 1; 1996 nz = ai[i+1] - ai[i] - 1; 1997 x[i] = omega*t[i]/d; 1998 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 1999 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2000 } 2001 } 2002 2003 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2004 for (i=0; i<m; i++) 2005 t[i] = b[i]; 2006 2007 for (i=0; i<m-1; i++){ /* update rhs */ 2008 v = aa + ai[i] + 1; 2009 vj = aj + ai[i] + 1; 2010 nz = ai[i+1] - ai[i] - 1; 2011 while (nz--) t[*vj++] -= x[i]*(*v++); 2012 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2013 } 2014 for (i=m-1; i>=0; i--){ 2015 d = *(aa + ai[i]); 2016 v = aa + ai[i] + 1; 2017 vj = aj + ai[i] + 1; 2018 nz = ai[i+1] - ai[i] - 1; 2019 sum = t[i]; 2020 while (nz--) sum -= x[*vj++]*(*v++); 2021 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2022 x[i] = (1-omega)*x[i] + omega*sum/d; 2023 } 2024 } 2025 its--; 2026 } 2027 2028 while (its--) { 2029 /* 2030 forward sweep: 2031 for i=0,...,m-1: 2032 sum[i] = (b[i] - U(i,:)x )/d[i]; 2033 x[i] = (1-omega)x[i] + omega*sum[i]; 2034 b = b - x[i]*U^T(i,:); 2035 2036 */ 2037 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2038 for (i=0; i<m; i++) 2039 t[i] = b[i]; 2040 2041 for (i=0; i<m; i++){ 2042 d = *(aa + ai[i]); /* diag[i] */ 2043 v = aa + ai[i] + 1; v1=v; 2044 vj = aj + ai[i] + 1; vj1=vj; 2045 nz = ai[i+1] - ai[i] - 1; nz1=nz; 2046 sum = t[i]; 2047 while (nz1--) sum -= (*v1++)*x[*vj1++]; 2048 x[i] = (1-omega)*x[i] + omega*sum/d; 2049 while (nz--) t[*vj++] -= x[i]*(*v++); 2050 ierr = PetscLogFlops(4*nz-2);CHKERRQ(ierr); 2051 } 2052 } 2053 2054 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2055 /* 2056 backward sweep: 2057 b = b - x[i]*U^T(i,:), i=0,...,n-2 2058 for i=m-1,...,0: 2059 sum[i] = (b[i] - U(i,:)x )/d[i]; 2060 x[i] = (1-omega)x[i] + omega*sum[i]; 2061 */ 2062 for (i=0; i<m; i++) 2063 t[i] = b[i]; 2064 2065 for (i=0; i<m-1; i++){ /* update rhs */ 2066 v = aa + ai[i] + 1; 2067 vj = aj + ai[i] + 1; 2068 nz = ai[i+1] - ai[i] - 1; 2069 while (nz--) t[*vj++] -= x[i]*(*v++); 2070 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2071 } 2072 for (i=m-1; i>=0; i--){ 2073 d = *(aa + ai[i]); 2074 v = aa + ai[i] + 1; 2075 vj = aj + ai[i] + 1; 2076 nz = ai[i+1] - ai[i] - 1; 2077 sum = t[i]; 2078 while (nz--) sum -= x[*vj++]*(*v++); 2079 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2080 x[i] = (1-omega)*x[i] + omega*sum/d; 2081 } 2082 } 2083 } 2084 2085 ierr = PetscFree(t);CHKERRQ(ierr); 2086 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2087 if (bb != xx) { 2088 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2089 } 2090 PetscFunctionReturn(0); 2091 } 2092 2093 2094 2095 2096 2097 2098