1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the SBAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 8 #include "src/inline/spops.h" 9 #include "src/mat/impls/sbaij/seq/sbaij.h" 10 11 #define CHUNKSIZE 10 12 13 /* 14 Checks for missing diagonals 15 */ 16 #undef __FUNCT__ 17 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 18 PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A,PetscTruth *missing,PetscInt *dd) 19 { 20 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 21 PetscErrorCode ierr; 22 PetscInt *diag,*jj = a->j,i; 23 24 PetscFunctionBegin; 25 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 26 diag = a->diag; 27 *missing = PETSC_FALSE; 28 for (i=0; i<a->mbs; i++) { 29 if (jj[diag[i]] != i) { 30 *missing = PETSC_TRUE; 31 if (dd) *dd = i; 32 break; 33 } 34 } 35 PetscFunctionReturn(0); 36 } 37 38 #undef __FUNCT__ 39 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 40 PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A) 41 { 42 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 43 PetscErrorCode ierr; 44 PetscInt i; 45 46 PetscFunctionBegin; 47 if (!a->diag) { 48 ierr = PetscMalloc(a->mbs*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 49 } 50 for (i=0; i<a->mbs; i++) a->diag[i] = a->i[i]; 51 PetscFunctionReturn(0); 52 } 53 54 #undef __FUNCT__ 55 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 56 static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 57 { 58 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 59 PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap.bs; 60 PetscErrorCode ierr; 61 62 PetscFunctionBegin; 63 *nn = n; 64 if (!ia) PetscFunctionReturn(0); 65 if (!blockcompressed) { 66 /* malloc & create the natural set of indices */ 67 ierr = PetscMalloc2((n+1)*bs,PetscInt,ia,nz*bs,PetscInt,ja);CHKERRQ(ierr); 68 for (i=0; i<n+1; i++) { 69 for (j=0; j<bs; j++) { 70 *ia[i*bs+j] = a->i[i]*bs+j+oshift; 71 } 72 } 73 for (i=0; i<nz; i++) { 74 for (j=0; j<bs; j++) { 75 *ja[i*bs+j] = a->j[i]*bs+j+oshift; 76 } 77 } 78 } else { /* blockcompressed */ 79 if (oshift == 1) { 80 /* temporarily add 1 to i and j indices */ 81 for (i=0; i<nz; i++) a->j[i]++; 82 for (i=0; i<n+1; i++) a->i[i]++; 83 } 84 *ia = a->i; *ja = a->j; 85 } 86 87 PetscFunctionReturn(0); 88 } 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 92 static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 93 { 94 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 95 PetscInt i,n = a->mbs,nz = a->i[n]; 96 PetscErrorCode ierr; 97 98 PetscFunctionBegin; 99 if (!ia) PetscFunctionReturn(0); 100 101 if (!blockcompressed) { 102 ierr = PetscFree2(*ia,*ja);CHKERRQ(ierr); 103 } else if (oshift == 1) { /* blockcompressed */ 104 for (i=0; i<nz; i++) a->j[i]--; 105 for (i=0; i<n+1; i++) a->i[i]--; 106 } 107 108 PetscFunctionReturn(0); 109 } 110 111 #undef __FUNCT__ 112 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 113 PetscErrorCode MatDestroy_SeqSBAIJ(Mat A) 114 { 115 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 116 PetscErrorCode ierr; 117 118 PetscFunctionBegin; 119 #if defined(PETSC_USE_LOG) 120 PetscLogObjectState((PetscObject)A,"Rows=%D, NZ=%D",A->rmap.N,a->nz); 121 #endif 122 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 123 if (a->row) {ierr = ISDestroy(a->row);CHKERRQ(ierr);} 124 if (a->col){ierr = ISDestroy(a->col);CHKERRQ(ierr);} 125 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 126 ierr = PetscFree(a->diag);CHKERRQ(ierr); 127 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 128 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 129 ierr = PetscFree(a->relax_work);CHKERRQ(ierr); 130 ierr = PetscFree(a->solves_work);CHKERRQ(ierr); 131 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 132 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 133 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 134 135 ierr = PetscFree(a->inew);CHKERRQ(ierr); 136 ierr = PetscFree(a);CHKERRQ(ierr); 137 138 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 139 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 140 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 141 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 142 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 143 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqbaij_C","",PETSC_NULL);CHKERRQ(ierr); 144 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 145 PetscFunctionReturn(0); 146 } 147 148 #undef __FUNCT__ 149 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 150 PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op,PetscTruth flg) 151 { 152 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 153 PetscErrorCode ierr; 154 155 PetscFunctionBegin; 156 switch (op) { 157 case MAT_ROW_ORIENTED: 158 a->roworiented = flg; 159 break; 160 case MAT_KEEP_ZEROED_ROWS: 161 a->keepzeroedrows = flg; 162 break; 163 case MAT_NEW_NONZERO_LOCATIONS: 164 a->nonew = (flg ? 0 : 1); 165 break; 166 case MAT_NEW_NONZERO_LOCATION_ERR: 167 a->nonew = (flg ? -1 : 0); 168 break; 169 case MAT_NEW_NONZERO_ALLOCATION_ERR: 170 a->nonew = (flg ? -2 : 0); 171 break; 172 case MAT_NEW_DIAGONALS: 173 case MAT_IGNORE_OFF_PROC_ENTRIES: 174 case MAT_USE_HASH_TABLE: 175 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 176 break; 177 case MAT_HERMITIAN: 178 if (flg) SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 179 case MAT_SYMMETRIC: 180 case MAT_STRUCTURALLY_SYMMETRIC: 181 case MAT_SYMMETRY_ETERNAL: 182 if (!flg) SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 183 ierr = PetscInfo1(A,"Option %s not relevent\n",MatOptions[op]);CHKERRQ(ierr); 184 break; 185 case MAT_IGNORE_LOWER_TRIANGULAR: 186 a->ignore_ltriangular = flg; 187 break; 188 case MAT_ERROR_LOWER_TRIANGULAR: 189 a->ignore_ltriangular = flg; 190 break; 191 case MAT_GETROW_UPPERTRIANGULAR: 192 a->getrow_utriangular = flg; 193 break; 194 default: 195 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 196 } 197 PetscFunctionReturn(0); 198 } 199 200 #undef __FUNCT__ 201 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 202 PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **v) 203 { 204 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 205 PetscErrorCode ierr; 206 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2; 207 MatScalar *aa,*aa_i; 208 PetscScalar *v_i; 209 210 PetscFunctionBegin; 211 if (A && !a->getrow_utriangular) SETERRQ(PETSC_ERR_SUP,"MatGetRow is not supported for SBAIJ matrix format. Getting the upper triangular part of row, run with -mat_getrow_uppertriangular, call MatSetOption(mat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE) or MatGetRowUpperTriangular()"); 212 /* Get the upper triangular part of the row */ 213 bs = A->rmap.bs; 214 ai = a->i; 215 aj = a->j; 216 aa = a->a; 217 bs2 = a->bs2; 218 219 if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %D out of range", row); 220 221 bn = row/bs; /* Block number */ 222 bp = row % bs; /* Block position */ 223 M = ai[bn+1] - ai[bn]; 224 *ncols = bs*M; 225 226 if (v) { 227 *v = 0; 228 if (*ncols) { 229 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 230 for (i=0; i<M; i++) { /* for each block in the block row */ 231 v_i = *v + i*bs; 232 aa_i = aa + bs2*(ai[bn] + i); 233 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 234 } 235 } 236 } 237 238 if (cols) { 239 *cols = 0; 240 if (*ncols) { 241 ierr = PetscMalloc((*ncols+row)*sizeof(PetscInt),cols);CHKERRQ(ierr); 242 for (i=0; i<M; i++) { /* for each block in the block row */ 243 cols_i = *cols + i*bs; 244 itmp = bs*aj[ai[bn] + i]; 245 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 246 } 247 } 248 } 249 250 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 251 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 252 #ifdef column_search 253 v_i = *v + M*bs; 254 cols_i = *cols + M*bs; 255 for (i=0; i<bn; i++){ /* for each block row */ 256 M = ai[i+1] - ai[i]; 257 for (j=0; j<M; j++){ 258 itmp = aj[ai[i] + j]; /* block column value */ 259 if (itmp == bn){ 260 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 261 for (k=0; k<bs; k++) { 262 *cols_i++ = i*bs+k; 263 *v_i++ = aa_i[k]; 264 } 265 *ncols += bs; 266 break; 267 } 268 } 269 } 270 #endif 271 PetscFunctionReturn(0); 272 } 273 274 #undef __FUNCT__ 275 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 276 PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 277 { 278 PetscErrorCode ierr; 279 280 PetscFunctionBegin; 281 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 282 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 283 PetscFunctionReturn(0); 284 } 285 286 #undef __FUNCT__ 287 #define __FUNCT__ "MatGetRowUpperTriangular_SeqSBAIJ" 288 PetscErrorCode MatGetRowUpperTriangular_SeqSBAIJ(Mat A) 289 { 290 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 291 292 PetscFunctionBegin; 293 a->getrow_utriangular = PETSC_TRUE; 294 PetscFunctionReturn(0); 295 } 296 #undef __FUNCT__ 297 #define __FUNCT__ "MatRestoreRowUpperTriangular_SeqSBAIJ" 298 PetscErrorCode MatRestoreRowUpperTriangular_SeqSBAIJ(Mat A) 299 { 300 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 301 302 PetscFunctionBegin; 303 a->getrow_utriangular = PETSC_FALSE; 304 PetscFunctionReturn(0); 305 } 306 307 #undef __FUNCT__ 308 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 309 PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,MatReuse reuse,Mat *B) 310 { 311 PetscErrorCode ierr; 312 PetscFunctionBegin; 313 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 314 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 315 } 316 PetscFunctionReturn(0); 317 } 318 319 #undef __FUNCT__ 320 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 321 static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 322 { 323 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 324 PetscErrorCode ierr; 325 PetscInt i,j,bs = A->rmap.bs,k,l,bs2=a->bs2; 326 const char *name; 327 PetscViewerFormat format; 328 329 PetscFunctionBegin; 330 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 331 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 332 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 333 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 334 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 335 Mat aij; 336 337 if (A->factor && bs>1){ 338 ierr = PetscPrintf(PETSC_COMM_SELF,"Warning: matrix is factored with bs>1. MatView() with PETSC_VIEWER_ASCII_MATLAB is not supported and ignored!\n");CHKERRQ(ierr); 339 PetscFunctionReturn(0); 340 } 341 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 342 ierr = MatView(aij,viewer);CHKERRQ(ierr); 343 ierr = MatDestroy(aij);CHKERRQ(ierr); 344 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 345 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 346 for (i=0; i<a->mbs; i++) { 347 for (j=0; j<bs; j++) { 348 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 349 for (k=a->i[i]; k<a->i[i+1]; k++) { 350 for (l=0; l<bs; l++) { 351 #if defined(PETSC_USE_COMPLEX) 352 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 353 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 354 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 355 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 356 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 357 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 358 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 359 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 360 } 361 #else 362 if (a->a[bs2*k + l*bs + j] != 0.0) { 363 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 364 } 365 #endif 366 } 367 } 368 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 369 } 370 } 371 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 372 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 373 PetscFunctionReturn(0); 374 } else { 375 if (A->factor && bs>1){ 376 ierr = PetscPrintf(PETSC_COMM_SELF,"Warning: matrix is factored. MatView_SeqSBAIJ_ASCII() may not display complete or logically correct entries!\n");CHKERRQ(ierr); 377 } 378 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 379 for (i=0; i<a->mbs; i++) { 380 for (j=0; j<bs; j++) { 381 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 382 for (k=a->i[i]; k<a->i[i+1]; k++) { 383 for (l=0; l<bs; l++) { 384 #if defined(PETSC_USE_COMPLEX) 385 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 386 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 387 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 388 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 389 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 390 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 391 } else { 392 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 393 } 394 #else 395 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 396 #endif 397 } 398 } 399 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 400 } 401 } 402 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 403 } 404 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 405 PetscFunctionReturn(0); 406 } 407 408 #undef __FUNCT__ 409 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 410 static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 411 { 412 Mat A = (Mat) Aa; 413 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 414 PetscErrorCode ierr; 415 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2; 416 PetscMPIInt rank; 417 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 418 MatScalar *aa; 419 MPI_Comm comm; 420 PetscViewer viewer; 421 422 PetscFunctionBegin; 423 /* 424 This is nasty. If this is called from an originally parallel matrix 425 then all processes call this,but only the first has the matrix so the 426 rest should return immediately. 427 */ 428 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 429 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 430 if (rank) PetscFunctionReturn(0); 431 432 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 433 434 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 435 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 436 437 /* loop over matrix elements drawing boxes */ 438 color = PETSC_DRAW_BLUE; 439 for (i=0,row=0; i<mbs; i++,row+=bs) { 440 for (j=a->i[i]; j<a->i[i+1]; j++) { 441 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 442 x_l = a->j[j]*bs; x_r = x_l + 1.0; 443 aa = a->a + j*bs2; 444 for (k=0; k<bs; k++) { 445 for (l=0; l<bs; l++) { 446 if (PetscRealPart(*aa++) >= 0.) continue; 447 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 448 } 449 } 450 } 451 } 452 color = PETSC_DRAW_CYAN; 453 for (i=0,row=0; i<mbs; i++,row+=bs) { 454 for (j=a->i[i]; j<a->i[i+1]; j++) { 455 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 456 x_l = a->j[j]*bs; x_r = x_l + 1.0; 457 aa = a->a + j*bs2; 458 for (k=0; k<bs; k++) { 459 for (l=0; l<bs; l++) { 460 if (PetscRealPart(*aa++) != 0.) continue; 461 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 462 } 463 } 464 } 465 } 466 467 color = PETSC_DRAW_RED; 468 for (i=0,row=0; i<mbs; i++,row+=bs) { 469 for (j=a->i[i]; j<a->i[i+1]; j++) { 470 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 471 x_l = a->j[j]*bs; x_r = x_l + 1.0; 472 aa = a->a + j*bs2; 473 for (k=0; k<bs; k++) { 474 for (l=0; l<bs; l++) { 475 if (PetscRealPart(*aa++) <= 0.) continue; 476 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 477 } 478 } 479 } 480 } 481 PetscFunctionReturn(0); 482 } 483 484 #undef __FUNCT__ 485 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 486 static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 487 { 488 PetscErrorCode ierr; 489 PetscReal xl,yl,xr,yr,w,h; 490 PetscDraw draw; 491 PetscTruth isnull; 492 493 PetscFunctionBegin; 494 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 495 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 496 497 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 498 xr = A->rmap.N; yr = A->rmap.N; h = yr/10.0; w = xr/10.0; 499 xr += w; yr += h; xl = -w; yl = -h; 500 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 501 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 502 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 503 PetscFunctionReturn(0); 504 } 505 506 #undef __FUNCT__ 507 #define __FUNCT__ "MatView_SeqSBAIJ" 508 PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 509 { 510 PetscErrorCode ierr; 511 PetscTruth iascii,isdraw; 512 513 PetscFunctionBegin; 514 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 515 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 516 if (iascii){ 517 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 518 } else if (isdraw) { 519 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 520 } else { 521 Mat B; 522 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 523 ierr = MatView(B,viewer);CHKERRQ(ierr); 524 ierr = MatDestroy(B);CHKERRQ(ierr); 525 } 526 PetscFunctionReturn(0); 527 } 528 529 530 #undef __FUNCT__ 531 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 532 PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 533 { 534 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 535 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 536 PetscInt *ai = a->i,*ailen = a->ilen; 537 PetscInt brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2; 538 MatScalar *ap,*aa = a->a; 539 540 PetscFunctionBegin; 541 for (k=0; k<m; k++) { /* loop over rows */ 542 row = im[k]; brow = row/bs; 543 if (row < 0) {v += n; continue;} /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",row); */ 544 if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1); 545 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 546 nrow = ailen[brow]; 547 for (l=0; l<n; l++) { /* loop over columns */ 548 if (in[l] < 0) {v++; continue;} /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",in[l]); */ 549 if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1); 550 col = in[l] ; 551 bcol = col/bs; 552 cidx = col%bs; 553 ridx = row%bs; 554 high = nrow; 555 low = 0; /* assume unsorted */ 556 while (high-low > 5) { 557 t = (low+high)/2; 558 if (rp[t] > bcol) high = t; 559 else low = t; 560 } 561 for (i=low; i<high; i++) { 562 if (rp[i] > bcol) break; 563 if (rp[i] == bcol) { 564 *v++ = ap[bs2*i+bs*cidx+ridx]; 565 goto finished; 566 } 567 } 568 *v++ = 0.0; 569 finished:; 570 } 571 } 572 PetscFunctionReturn(0); 573 } 574 575 576 #undef __FUNCT__ 577 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 578 PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 579 { 580 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 581 PetscErrorCode ierr; 582 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 583 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 584 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval; 585 PetscTruth roworiented=a->roworiented; 586 const PetscScalar *value = v; 587 MatScalar *ap,*aa = a->a,*bap; 588 589 PetscFunctionBegin; 590 if (roworiented) { 591 stepval = (n-1)*bs; 592 } else { 593 stepval = (m-1)*bs; 594 } 595 for (k=0; k<m; k++) { /* loop over added rows */ 596 row = im[k]; 597 if (row < 0) continue; 598 #if defined(PETSC_USE_DEBUG) 599 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 600 #endif 601 rp = aj + ai[row]; 602 ap = aa + bs2*ai[row]; 603 rmax = imax[row]; 604 nrow = ailen[row]; 605 low = 0; 606 high = nrow; 607 for (l=0; l<n; l++) { /* loop over added columns */ 608 if (in[l] < 0) continue; 609 col = in[l]; 610 #if defined(PETSC_USE_DEBUG) 611 if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",col,a->nbs-1); 612 #endif 613 if (col < row) continue; /* ignore lower triangular block */ 614 if (roworiented) { 615 value = v + k*(stepval+bs)*bs + l*bs; 616 } else { 617 value = v + l*(stepval+bs)*bs + k*bs; 618 } 619 if (col <= lastcol) low = 0; else high = nrow; 620 lastcol = col; 621 while (high-low > 7) { 622 t = (low+high)/2; 623 if (rp[t] > col) high = t; 624 else low = t; 625 } 626 for (i=low; i<high; i++) { 627 if (rp[i] > col) break; 628 if (rp[i] == col) { 629 bap = ap + bs2*i; 630 if (roworiented) { 631 if (is == ADD_VALUES) { 632 for (ii=0; ii<bs; ii++,value+=stepval) { 633 for (jj=ii; jj<bs2; jj+=bs) { 634 bap[jj] += *value++; 635 } 636 } 637 } else { 638 for (ii=0; ii<bs; ii++,value+=stepval) { 639 for (jj=ii; jj<bs2; jj+=bs) { 640 bap[jj] = *value++; 641 } 642 } 643 } 644 } else { 645 if (is == ADD_VALUES) { 646 for (ii=0; ii<bs; ii++,value+=stepval) { 647 for (jj=0; jj<bs; jj++) { 648 *bap++ += *value++; 649 } 650 } 651 } else { 652 for (ii=0; ii<bs; ii++,value+=stepval) { 653 for (jj=0; jj<bs; jj++) { 654 *bap++ = *value++; 655 } 656 } 657 } 658 } 659 goto noinsert2; 660 } 661 } 662 if (nonew == 1) goto noinsert2; 663 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 664 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 665 N = nrow++ - 1; high++; 666 /* shift up all the later entries in this row */ 667 for (ii=N; ii>=i; ii--) { 668 rp[ii+1] = rp[ii]; 669 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 670 } 671 if (N >= i) { 672 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 673 } 674 rp[i] = col; 675 bap = ap + bs2*i; 676 if (roworiented) { 677 for (ii=0; ii<bs; ii++,value+=stepval) { 678 for (jj=ii; jj<bs2; jj+=bs) { 679 bap[jj] = *value++; 680 } 681 } 682 } else { 683 for (ii=0; ii<bs; ii++,value+=stepval) { 684 for (jj=0; jj<bs; jj++) { 685 *bap++ = *value++; 686 } 687 } 688 } 689 noinsert2:; 690 low = i; 691 } 692 ailen[row] = nrow; 693 } 694 PetscFunctionReturn(0); 695 } 696 697 #undef __FUNCT__ 698 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 699 PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 700 { 701 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 702 PetscErrorCode ierr; 703 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 704 PetscInt m = A->rmap.N,*ip,N,*ailen = a->ilen; 705 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 706 MatScalar *aa = a->a,*ap; 707 708 PetscFunctionBegin; 709 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 710 711 if (m) rmax = ailen[0]; 712 for (i=1; i<mbs; i++) { 713 /* move each row back by the amount of empty slots (fshift) before it*/ 714 fshift += imax[i-1] - ailen[i-1]; 715 rmax = PetscMax(rmax,ailen[i]); 716 if (fshift) { 717 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 718 N = ailen[i]; 719 for (j=0; j<N; j++) { 720 ip[j-fshift] = ip[j]; 721 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 722 } 723 } 724 ai[i] = ai[i-1] + ailen[i-1]; 725 } 726 if (mbs) { 727 fshift += imax[mbs-1] - ailen[mbs-1]; 728 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 729 } 730 /* reset ilen and imax for each row */ 731 for (i=0; i<mbs; i++) { 732 ailen[i] = imax[i] = ai[i+1] - ai[i]; 733 } 734 a->nz = ai[mbs]; 735 736 /* diagonals may have moved, reset it */ 737 if (a->diag) { 738 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 739 } 740 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->rmap.N,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 741 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 742 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 743 a->reallocs = 0; 744 A->info.nz_unneeded = (PetscReal)fshift*bs2; 745 PetscFunctionReturn(0); 746 } 747 748 /* 749 This function returns an array of flags which indicate the locations of contiguous 750 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 751 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 752 Assume: sizes should be long enough to hold all the values. 753 */ 754 #undef __FUNCT__ 755 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 756 PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 757 { 758 PetscInt i,j,k,row; 759 PetscTruth flg; 760 761 PetscFunctionBegin; 762 for (i=0,j=0; i<n; j++) { 763 row = idx[i]; 764 if (row%bs!=0) { /* Not the begining of a block */ 765 sizes[j] = 1; 766 i++; 767 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 768 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 769 i++; 770 } else { /* Begining of the block, so check if the complete block exists */ 771 flg = PETSC_TRUE; 772 for (k=1; k<bs; k++) { 773 if (row+k != idx[i+k]) { /* break in the block */ 774 flg = PETSC_FALSE; 775 break; 776 } 777 } 778 if (flg) { /* No break in the bs */ 779 sizes[j] = bs; 780 i+= bs; 781 } else { 782 sizes[j] = 1; 783 i++; 784 } 785 } 786 } 787 *bs_max = j; 788 PetscFunctionReturn(0); 789 } 790 791 792 /* Only add/insert a(i,j) with i<=j (blocks). 793 Any a(i,j) with i>j input by user is ingored. 794 */ 795 796 #undef __FUNCT__ 797 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 798 PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 799 { 800 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 801 PetscErrorCode ierr; 802 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 803 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 804 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol; 805 PetscInt ridx,cidx,bs2=a->bs2; 806 MatScalar *ap,value,*aa=a->a,*bap; 807 808 PetscFunctionBegin; 809 for (k=0; k<m; k++) { /* loop over added rows */ 810 row = im[k]; /* row number */ 811 brow = row/bs; /* block row number */ 812 if (row < 0) continue; 813 #if defined(PETSC_USE_DEBUG) 814 if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1); 815 #endif 816 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 817 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 818 rmax = imax[brow]; /* maximum space allocated for this row */ 819 nrow = ailen[brow]; /* actual length of this row */ 820 low = 0; 821 822 for (l=0; l<n; l++) { /* loop over added columns */ 823 if (in[l] < 0) continue; 824 #if defined(PETSC_USE_DEBUG) 825 if (in[l] >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->rmap.N-1); 826 #endif 827 col = in[l]; 828 bcol = col/bs; /* block col number */ 829 830 if (brow > bcol) { 831 if (a->ignore_ltriangular){ 832 continue; /* ignore lower triangular values */ 833 } else { 834 SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 835 } 836 } 837 838 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 839 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 840 /* element value a(k,l) */ 841 if (roworiented) { 842 value = v[l + k*n]; 843 } else { 844 value = v[k + l*m]; 845 } 846 847 /* move pointer bap to a(k,l) quickly and add/insert value */ 848 if (col <= lastcol) low = 0; high = nrow; 849 lastcol = col; 850 while (high-low > 7) { 851 t = (low+high)/2; 852 if (rp[t] > bcol) high = t; 853 else low = t; 854 } 855 for (i=low; i<high; i++) { 856 if (rp[i] > bcol) break; 857 if (rp[i] == bcol) { 858 bap = ap + bs2*i + bs*cidx + ridx; 859 if (is == ADD_VALUES) *bap += value; 860 else *bap = value; 861 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 862 if (brow == bcol && ridx < cidx){ 863 bap = ap + bs2*i + bs*ridx + cidx; 864 if (is == ADD_VALUES) *bap += value; 865 else *bap = value; 866 } 867 goto noinsert1; 868 } 869 } 870 871 if (nonew == 1) goto noinsert1; 872 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 873 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 874 875 N = nrow++ - 1; high++; 876 /* shift up all the later entries in this row */ 877 for (ii=N; ii>=i; ii--) { 878 rp[ii+1] = rp[ii]; 879 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 880 } 881 if (N>=i) { 882 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 883 } 884 rp[i] = bcol; 885 ap[bs2*i + bs*cidx + ridx] = value; 886 noinsert1:; 887 low = i; 888 } 889 } /* end of loop over added columns */ 890 ailen[brow] = nrow; 891 } /* end of loop over added rows */ 892 PetscFunctionReturn(0); 893 } 894 895 #undef __FUNCT__ 896 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 897 PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info) 898 { 899 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 900 Mat outA; 901 PetscErrorCode ierr; 902 PetscTruth row_identity; 903 904 PetscFunctionBegin; 905 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels=0 is supported for in-place icc"); 906 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 907 if (!row_identity) SETERRQ(PETSC_ERR_SUP,"Matrix reordering is not supported"); 908 if (inA->rmap.bs != 1) SETERRQ1(PETSC_ERR_SUP,"Matrix block size %D is not supported",inA->rmap.bs); /* Need to replace MatCholeskyFactorSymbolic_SeqSBAIJ_MSR()! */ 909 910 outA = inA; 911 inA->factor = FACTOR_CHOLESKY; 912 913 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 914 /* 915 Blocksize < 8 have a special faster factorization/solver 916 for ICC(0) factorization with natural ordering 917 */ 918 switch (inA->rmap.bs){ /* Note: row_identity = PETSC_TRUE! */ 919 case 1: 920 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 921 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 922 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 923 inA->ops->solves = MatSolves_SeqSBAIJ_1; 924 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 925 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 926 ierr = PetscInfo(inA,"Using special in-place natural ordering solvetrans BS=1\n");CHKERRQ(ierr); 927 break; 928 case 2: 929 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 930 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 931 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 932 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_2_NaturalOrdering; 933 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering; 934 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=2\n");CHKERRQ(ierr); 935 break; 936 case 3: 937 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 938 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 939 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 940 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_3_NaturalOrdering; 941 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering; 942 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=3\n");CHKERRQ(ierr); 943 break; 944 case 4: 945 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 946 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 947 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 948 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_4_NaturalOrdering; 949 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering; 950 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=4\n");CHKERRQ(ierr); 951 break; 952 case 5: 953 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 954 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 955 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 956 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_5_NaturalOrdering; 957 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering; 958 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=5\n");CHKERRQ(ierr); 959 break; 960 case 6: 961 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 962 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 963 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 964 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_6_NaturalOrdering; 965 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering; 966 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=6\n");CHKERRQ(ierr); 967 break; 968 case 7: 969 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 970 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 971 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 972 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_7_NaturalOrdering; 973 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering; 974 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=7\n");CHKERRQ(ierr); 975 break; 976 default: 977 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering; 978 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_N_NaturalOrdering; 979 inA->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering; 980 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_N_NaturalOrdering; 981 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering; 982 break; 983 } 984 985 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 986 if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); } 987 a->row = row; 988 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 989 if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); } 990 a->col = row; 991 992 /* Create the invert permutation so that it can be used in MatCholeskyFactorNumeric() */ 993 if (a->icol) {ierr = ISInvertPermutation(row,PETSC_DECIDE, &a->icol);CHKERRQ(ierr);} 994 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 995 996 if (!a->solve_work) { 997 ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 998 ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr); 999 } 1000 1001 ierr = MatCholeskyFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 1002 PetscFunctionReturn(0); 1003 } 1004 1005 EXTERN_C_BEGIN 1006 #undef __FUNCT__ 1007 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1008 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,PetscInt *indices) 1009 { 1010 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1011 PetscInt i,nz,n; 1012 1013 PetscFunctionBegin; 1014 nz = baij->maxnz; 1015 n = mat->cmap.n; 1016 for (i=0; i<nz; i++) { 1017 baij->j[i] = indices[i]; 1018 } 1019 baij->nz = nz; 1020 for (i=0; i<n; i++) { 1021 baij->ilen[i] = baij->imax[i]; 1022 } 1023 PetscFunctionReturn(0); 1024 } 1025 EXTERN_C_END 1026 1027 #undef __FUNCT__ 1028 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1029 /*@ 1030 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1031 in the matrix. 1032 1033 Input Parameters: 1034 + mat - the SeqSBAIJ matrix 1035 - indices - the column indices 1036 1037 Level: advanced 1038 1039 Notes: 1040 This can be called if you have precomputed the nonzero structure of the 1041 matrix and want to provide it to the matrix object to improve the performance 1042 of the MatSetValues() operation. 1043 1044 You MUST have set the correct numbers of nonzeros per row in the call to 1045 MatCreateSeqSBAIJ(), and the columns indices MUST be sorted. 1046 1047 MUST be called before any calls to MatSetValues() 1048 1049 .seealso: MatCreateSeqSBAIJ 1050 @*/ 1051 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1052 { 1053 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1054 1055 PetscFunctionBegin; 1056 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1057 PetscValidPointer(indices,2); 1058 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1059 if (f) { 1060 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1061 } else { 1062 SETERRQ(PETSC_ERR_SUP,"Wrong type of matrix to set column indices"); 1063 } 1064 PetscFunctionReturn(0); 1065 } 1066 1067 #undef __FUNCT__ 1068 #define __FUNCT__ "MatCopy_SeqSBAIJ" 1069 PetscErrorCode MatCopy_SeqSBAIJ(Mat A,Mat B,MatStructure str) 1070 { 1071 PetscErrorCode ierr; 1072 1073 PetscFunctionBegin; 1074 /* If the two matrices have the same copy implementation, use fast copy. */ 1075 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 1076 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1077 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1078 1079 if (a->i[A->rmap.N] != b->i[B->rmap.N]) { 1080 SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 1081 } 1082 ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr); 1083 } else { 1084 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1085 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1086 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1087 } 1088 PetscFunctionReturn(0); 1089 } 1090 1091 #undef __FUNCT__ 1092 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1093 PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A) 1094 { 1095 PetscErrorCode ierr; 1096 1097 PetscFunctionBegin; 1098 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 1099 PetscFunctionReturn(0); 1100 } 1101 1102 #undef __FUNCT__ 1103 #define __FUNCT__ "MatGetArray_SeqSBAIJ" 1104 PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1105 { 1106 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1107 PetscFunctionBegin; 1108 *array = a->a; 1109 PetscFunctionReturn(0); 1110 } 1111 1112 #undef __FUNCT__ 1113 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ" 1114 PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1115 { 1116 PetscFunctionBegin; 1117 PetscFunctionReturn(0); 1118 } 1119 1120 #include "petscblaslapack.h" 1121 #undef __FUNCT__ 1122 #define __FUNCT__ "MatAXPY_SeqSBAIJ" 1123 PetscErrorCode MatAXPY_SeqSBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1124 { 1125 Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data; 1126 PetscErrorCode ierr; 1127 PetscInt i,bs=Y->rmap.bs,bs2,j; 1128 PetscBLASInt one = 1,bnz = PetscBLASIntCast(x->nz); 1129 1130 PetscFunctionBegin; 1131 if (str == SAME_NONZERO_PATTERN) { 1132 PetscScalar alpha = a; 1133 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1134 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1135 if (y->xtoy && y->XtoY != X) { 1136 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1137 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1138 } 1139 if (!y->xtoy) { /* get xtoy */ 1140 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1141 y->XtoY = X; 1142 } 1143 bs2 = bs*bs; 1144 for (i=0; i<x->nz; i++) { 1145 j = 0; 1146 while (j < bs2){ 1147 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 1148 j++; 1149 } 1150 } 1151 ierr = PetscInfo3(Y,"ratio of nnz_s(X)/nnz_s(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 1152 } else { 1153 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1154 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1155 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1156 } 1157 PetscFunctionReturn(0); 1158 } 1159 1160 #undef __FUNCT__ 1161 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ" 1162 PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg) 1163 { 1164 PetscFunctionBegin; 1165 *flg = PETSC_TRUE; 1166 PetscFunctionReturn(0); 1167 } 1168 1169 #undef __FUNCT__ 1170 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ" 1171 PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg) 1172 { 1173 PetscFunctionBegin; 1174 *flg = PETSC_TRUE; 1175 PetscFunctionReturn(0); 1176 } 1177 1178 #undef __FUNCT__ 1179 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ" 1180 PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg) 1181 { 1182 PetscFunctionBegin; 1183 *flg = PETSC_FALSE; 1184 PetscFunctionReturn(0); 1185 } 1186 1187 #undef __FUNCT__ 1188 #define __FUNCT__ "MatRealPart_SeqSBAIJ" 1189 PetscErrorCode MatRealPart_SeqSBAIJ(Mat A) 1190 { 1191 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1192 PetscInt i,nz = a->bs2*a->i[a->mbs]; 1193 MatScalar *aa = a->a; 1194 1195 PetscFunctionBegin; 1196 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 1197 PetscFunctionReturn(0); 1198 } 1199 1200 #undef __FUNCT__ 1201 #define __FUNCT__ "MatImaginaryPart_SeqSBAIJ" 1202 PetscErrorCode MatImaginaryPart_SeqSBAIJ(Mat A) 1203 { 1204 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1205 PetscInt i,nz = a->bs2*a->i[a->mbs]; 1206 MatScalar *aa = a->a; 1207 1208 PetscFunctionBegin; 1209 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 1210 PetscFunctionReturn(0); 1211 } 1212 1213 /* -------------------------------------------------------------------*/ 1214 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1215 MatGetRow_SeqSBAIJ, 1216 MatRestoreRow_SeqSBAIJ, 1217 MatMult_SeqSBAIJ_N, 1218 /* 4*/ MatMultAdd_SeqSBAIJ_N, 1219 MatMult_SeqSBAIJ_N, /* transpose versions are same as non-transpose versions */ 1220 MatMultAdd_SeqSBAIJ_N, 1221 MatSolve_SeqSBAIJ_N, 1222 0, 1223 0, 1224 /*10*/ 0, 1225 0, 1226 MatCholeskyFactor_SeqSBAIJ, 1227 MatRelax_SeqSBAIJ, 1228 MatTranspose_SeqSBAIJ, 1229 /*15*/ MatGetInfo_SeqSBAIJ, 1230 MatEqual_SeqSBAIJ, 1231 MatGetDiagonal_SeqSBAIJ, 1232 MatDiagonalScale_SeqSBAIJ, 1233 MatNorm_SeqSBAIJ, 1234 /*20*/ 0, 1235 MatAssemblyEnd_SeqSBAIJ, 1236 0, 1237 MatSetOption_SeqSBAIJ, 1238 MatZeroEntries_SeqSBAIJ, 1239 /*25*/ 0, 1240 0, 1241 0, 1242 MatCholeskyFactorSymbolic_SeqSBAIJ, 1243 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1244 /*30*/ MatSetUpPreallocation_SeqSBAIJ, 1245 0, 1246 MatICCFactorSymbolic_SeqSBAIJ, 1247 MatGetArray_SeqSBAIJ, 1248 MatRestoreArray_SeqSBAIJ, 1249 /*35*/ MatDuplicate_SeqSBAIJ, 1250 MatForwardSolve_SeqSBAIJ_N, 1251 MatBackwardSolve_SeqSBAIJ_N, 1252 0, 1253 MatICCFactor_SeqSBAIJ, 1254 /*40*/ MatAXPY_SeqSBAIJ, 1255 MatGetSubMatrices_SeqSBAIJ, 1256 MatIncreaseOverlap_SeqSBAIJ, 1257 MatGetValues_SeqSBAIJ, 1258 MatCopy_SeqSBAIJ, 1259 /*45*/ 0, 1260 MatScale_SeqSBAIJ, 1261 0, 1262 0, 1263 0, 1264 /*50*/ 0, 1265 MatGetRowIJ_SeqSBAIJ, 1266 MatRestoreRowIJ_SeqSBAIJ, 1267 0, 1268 0, 1269 /*55*/ 0, 1270 0, 1271 0, 1272 0, 1273 MatSetValuesBlocked_SeqSBAIJ, 1274 /*60*/ MatGetSubMatrix_SeqSBAIJ, 1275 0, 1276 0, 1277 0, 1278 0, 1279 /*65*/ 0, 1280 0, 1281 0, 1282 0, 1283 0, 1284 /*70*/ MatGetRowMaxAbs_SeqSBAIJ, 1285 0, 1286 0, 1287 0, 1288 0, 1289 /*75*/ 0, 1290 0, 1291 0, 1292 0, 1293 0, 1294 /*80*/ 0, 1295 0, 1296 0, 1297 #if !defined(PETSC_USE_COMPLEX) 1298 MatGetInertia_SeqSBAIJ, 1299 #else 1300 0, 1301 #endif 1302 MatLoad_SeqSBAIJ, 1303 /*85*/ MatIsSymmetric_SeqSBAIJ, 1304 MatIsHermitian_SeqSBAIJ, 1305 MatIsStructurallySymmetric_SeqSBAIJ, 1306 0, 1307 0, 1308 /*90*/ 0, 1309 0, 1310 0, 1311 0, 1312 0, 1313 /*95*/ 0, 1314 0, 1315 0, 1316 0, 1317 0, 1318 /*100*/0, 1319 0, 1320 0, 1321 0, 1322 0, 1323 /*105*/0, 1324 MatRealPart_SeqSBAIJ, 1325 MatImaginaryPart_SeqSBAIJ, 1326 MatGetRowUpperTriangular_SeqSBAIJ, 1327 MatRestoreRowUpperTriangular_SeqSBAIJ, 1328 /*110*/0, 1329 0, 1330 0, 1331 0, 1332 MatMissingDiagonal_SeqSBAIJ 1333 }; 1334 1335 EXTERN_C_BEGIN 1336 #undef __FUNCT__ 1337 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1338 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqSBAIJ(Mat mat) 1339 { 1340 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1341 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 1342 PetscErrorCode ierr; 1343 1344 PetscFunctionBegin; 1345 if (aij->nonew != 1) { 1346 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 1347 } 1348 1349 /* allocate space for values if not already there */ 1350 if (!aij->saved_values) { 1351 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1352 } 1353 1354 /* copy values over */ 1355 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1356 PetscFunctionReturn(0); 1357 } 1358 EXTERN_C_END 1359 1360 EXTERN_C_BEGIN 1361 #undef __FUNCT__ 1362 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1363 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqSBAIJ(Mat mat) 1364 { 1365 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1366 PetscErrorCode ierr; 1367 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 1368 1369 PetscFunctionBegin; 1370 if (aij->nonew != 1) { 1371 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 1372 } 1373 if (!aij->saved_values) { 1374 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 1375 } 1376 1377 /* copy values over */ 1378 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1379 PetscFunctionReturn(0); 1380 } 1381 EXTERN_C_END 1382 1383 EXTERN_C_BEGIN 1384 #undef __FUNCT__ 1385 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ" 1386 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 1387 { 1388 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1389 PetscErrorCode ierr; 1390 PetscInt i,mbs,bs2; 1391 PetscTruth skipallocation = PETSC_FALSE,flg; 1392 1393 PetscFunctionBegin; 1394 B->preallocated = PETSC_TRUE; 1395 ierr = PetscOptionsGetInt(((PetscObject)B)->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1396 B->rmap.bs = B->cmap.bs = bs; 1397 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 1398 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 1399 1400 mbs = B->rmap.N/bs; 1401 bs2 = bs*bs; 1402 1403 if (mbs*bs != B->rmap.N) { 1404 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1405 } 1406 1407 if (nz == MAT_SKIP_ALLOCATION) { 1408 skipallocation = PETSC_TRUE; 1409 nz = 0; 1410 } 1411 1412 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1413 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 1414 if (nnz) { 1415 for (i=0; i<mbs; i++) { 1416 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 1417 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],mbs); 1418 } 1419 } 1420 1421 ierr = PetscOptionsHasName(((PetscObject)B)->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1422 if (!flg) { 1423 switch (bs) { 1424 case 1: 1425 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1426 B->ops->solve = MatSolve_SeqSBAIJ_1; 1427 B->ops->solves = MatSolves_SeqSBAIJ_1; 1428 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 1429 B->ops->mult = MatMult_SeqSBAIJ_1; 1430 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1431 B->ops->multtranspose = MatMult_SeqSBAIJ_1; 1432 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_1; 1433 break; 1434 case 2: 1435 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1436 B->ops->solve = MatSolve_SeqSBAIJ_2; 1437 B->ops->solvetranspose = MatSolve_SeqSBAIJ_2; 1438 B->ops->mult = MatMult_SeqSBAIJ_2; 1439 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1440 B->ops->multtranspose = MatMult_SeqSBAIJ_2; 1441 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_2; 1442 break; 1443 case 3: 1444 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1445 B->ops->solve = MatSolve_SeqSBAIJ_3; 1446 B->ops->solvetranspose = MatSolve_SeqSBAIJ_3; 1447 B->ops->mult = MatMult_SeqSBAIJ_3; 1448 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1449 B->ops->multtranspose = MatMult_SeqSBAIJ_3; 1450 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_3; 1451 break; 1452 case 4: 1453 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1454 B->ops->solve = MatSolve_SeqSBAIJ_4; 1455 B->ops->solvetranspose = MatSolve_SeqSBAIJ_4; 1456 B->ops->mult = MatMult_SeqSBAIJ_4; 1457 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1458 B->ops->multtranspose = MatMult_SeqSBAIJ_4; 1459 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_4; 1460 break; 1461 case 5: 1462 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1463 B->ops->solve = MatSolve_SeqSBAIJ_5; 1464 B->ops->solvetranspose = MatSolve_SeqSBAIJ_5; 1465 B->ops->mult = MatMult_SeqSBAIJ_5; 1466 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1467 B->ops->multtranspose = MatMult_SeqSBAIJ_5; 1468 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_5; 1469 break; 1470 case 6: 1471 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1472 B->ops->solve = MatSolve_SeqSBAIJ_6; 1473 B->ops->solvetranspose = MatSolve_SeqSBAIJ_6; 1474 B->ops->mult = MatMult_SeqSBAIJ_6; 1475 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1476 B->ops->multtranspose = MatMult_SeqSBAIJ_6; 1477 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_6; 1478 break; 1479 case 7: 1480 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1481 B->ops->solve = MatSolve_SeqSBAIJ_7; 1482 B->ops->solvetranspose = MatSolve_SeqSBAIJ_7; 1483 B->ops->mult = MatMult_SeqSBAIJ_7; 1484 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1485 B->ops->multtranspose = MatMult_SeqSBAIJ_7; 1486 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_7; 1487 break; 1488 } 1489 } 1490 1491 b->mbs = mbs; 1492 b->nbs = mbs; 1493 if (!skipallocation) { 1494 if (!b->imax) { 1495 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 1496 ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 1497 } 1498 if (!nnz) { 1499 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1500 else if (nz <= 0) nz = 1; 1501 for (i=0; i<mbs; i++) { 1502 b->imax[i] = nz; 1503 } 1504 nz = nz*mbs; /* total nz */ 1505 } else { 1506 nz = 0; 1507 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1508 } 1509 /* b->ilen will count nonzeros in each block row so far. */ 1510 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1511 /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1512 1513 /* allocate the matrix space */ 1514 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 1515 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr); 1516 ierr = PetscLogObjectMemory(B,(B->rmap.N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 1517 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1518 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1519 b->singlemalloc = PETSC_TRUE; 1520 1521 /* pointer to beginning of each row */ 1522 b->i[0] = 0; 1523 for (i=1; i<mbs+1; i++) { 1524 b->i[i] = b->i[i-1] + b->imax[i-1]; 1525 } 1526 b->free_a = PETSC_TRUE; 1527 b->free_ij = PETSC_TRUE; 1528 } else { 1529 b->free_a = PETSC_FALSE; 1530 b->free_ij = PETSC_FALSE; 1531 } 1532 1533 B->rmap.bs = bs; 1534 b->bs2 = bs2; 1535 b->nz = 0; 1536 b->maxnz = nz*bs2; 1537 1538 b->inew = 0; 1539 b->jnew = 0; 1540 b->anew = 0; 1541 b->a2anew = 0; 1542 b->permute = PETSC_FALSE; 1543 PetscFunctionReturn(0); 1544 } 1545 EXTERN_C_END 1546 1547 EXTERN_C_BEGIN 1548 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 1549 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqBAIJ(Mat, MatType,MatReuse,Mat*); 1550 EXTERN_C_END 1551 1552 /*MC 1553 MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices, 1554 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1555 1556 Options Database Keys: 1557 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions() 1558 1559 Level: beginner 1560 1561 .seealso: MatCreateSeqSBAIJ 1562 M*/ 1563 1564 EXTERN_C_BEGIN 1565 #undef __FUNCT__ 1566 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1567 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqSBAIJ(Mat B) 1568 { 1569 Mat_SeqSBAIJ *b; 1570 PetscErrorCode ierr; 1571 PetscMPIInt size; 1572 PetscTruth flg; 1573 1574 PetscFunctionBegin; 1575 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 1576 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1577 1578 ierr = PetscNewLog(B,Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1579 B->data = (void*)b; 1580 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1581 B->ops->destroy = MatDestroy_SeqSBAIJ; 1582 B->ops->view = MatView_SeqSBAIJ; 1583 B->factor = 0; 1584 B->mapping = 0; 1585 b->row = 0; 1586 b->icol = 0; 1587 b->reallocs = 0; 1588 b->saved_values = 0; 1589 1590 1591 b->roworiented = PETSC_TRUE; 1592 b->nonew = 0; 1593 b->diag = 0; 1594 b->solve_work = 0; 1595 b->mult_work = 0; 1596 B->spptr = 0; 1597 b->keepzeroedrows = PETSC_FALSE; 1598 b->xtoy = 0; 1599 b->XtoY = 0; 1600 1601 b->inew = 0; 1602 b->jnew = 0; 1603 b->anew = 0; 1604 b->a2anew = 0; 1605 b->permute = PETSC_FALSE; 1606 1607 b->ignore_ltriangular = PETSC_FALSE; 1608 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_ignore_lower_triangular",&flg);CHKERRQ(ierr); 1609 if (flg) b->ignore_ltriangular = PETSC_TRUE; 1610 1611 b->getrow_utriangular = PETSC_FALSE; 1612 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_getrow_uppertriangular",&flg);CHKERRQ(ierr); 1613 if (flg) b->getrow_utriangular = PETSC_TRUE; 1614 1615 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1616 "MatStoreValues_SeqSBAIJ", 1617 MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1618 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1619 "MatRetrieveValues_SeqSBAIJ", 1620 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1621 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1622 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1623 MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1624 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C", 1625 "MatConvert_SeqSBAIJ_SeqAIJ", 1626 MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr); 1627 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C", 1628 "MatConvert_SeqSBAIJ_SeqBAIJ", 1629 MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr); 1630 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C", 1631 "MatSeqSBAIJSetPreallocation_SeqSBAIJ", 1632 MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr); 1633 1634 B->symmetric = PETSC_TRUE; 1635 B->structurally_symmetric = PETSC_TRUE; 1636 B->symmetric_set = PETSC_TRUE; 1637 B->structurally_symmetric_set = PETSC_TRUE; 1638 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQSBAIJ);CHKERRQ(ierr); 1639 PetscFunctionReturn(0); 1640 } 1641 EXTERN_C_END 1642 1643 #undef __FUNCT__ 1644 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1645 /*@C 1646 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1647 compressed row) format. For good matrix assembly performance the 1648 user should preallocate the matrix storage by setting the parameter nz 1649 (or the array nnz). By setting these parameters accurately, performance 1650 during matrix assembly can be increased by more than a factor of 50. 1651 1652 Collective on Mat 1653 1654 Input Parameters: 1655 + A - the symmetric matrix 1656 . bs - size of block 1657 . nz - number of block nonzeros per block row (same for all rows) 1658 - nnz - array containing the number of block nonzeros in the upper triangular plus 1659 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1660 1661 Options Database Keys: 1662 . -mat_no_unroll - uses code that does not unroll the loops in the 1663 block calculations (much slower) 1664 . -mat_block_size - size of the blocks to use 1665 1666 Level: intermediate 1667 1668 Notes: 1669 Specify the preallocated storage with either nz or nnz (not both). 1670 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1671 allocation. For additional details, see the users manual chapter on 1672 matrices. 1673 1674 You can call MatGetInfo() to get information on how effective the preallocation was; 1675 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 1676 You can also run with the option -info and look for messages with the string 1677 malloc in them to see if additional memory allocation was needed. 1678 1679 If the nnz parameter is given then the nz parameter is ignored 1680 1681 1682 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1683 @*/ 1684 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 1685 { 1686 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 1687 1688 PetscFunctionBegin; 1689 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1690 if (f) { 1691 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 1692 } 1693 PetscFunctionReturn(0); 1694 } 1695 1696 #undef __FUNCT__ 1697 #define __FUNCT__ "MatCreateSeqSBAIJ" 1698 /*@C 1699 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1700 compressed row) format. For good matrix assembly performance the 1701 user should preallocate the matrix storage by setting the parameter nz 1702 (or the array nnz). By setting these parameters accurately, performance 1703 during matrix assembly can be increased by more than a factor of 50. 1704 1705 Collective on MPI_Comm 1706 1707 Input Parameters: 1708 + comm - MPI communicator, set to PETSC_COMM_SELF 1709 . bs - size of block 1710 . m - number of rows, or number of columns 1711 . nz - number of block nonzeros per block row (same for all rows) 1712 - nnz - array containing the number of block nonzeros in the upper triangular plus 1713 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1714 1715 Output Parameter: 1716 . A - the symmetric matrix 1717 1718 Options Database Keys: 1719 . -mat_no_unroll - uses code that does not unroll the loops in the 1720 block calculations (much slower) 1721 . -mat_block_size - size of the blocks to use 1722 1723 Level: intermediate 1724 1725 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 1726 MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely 1727 true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles. 1728 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 1729 1730 Notes: 1731 The number of rows and columns must be divisible by blocksize. 1732 This matrix type does not support complex Hermitian operation. 1733 1734 Specify the preallocated storage with either nz or nnz (not both). 1735 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1736 allocation. For additional details, see the users manual chapter on 1737 matrices. 1738 1739 If the nnz parameter is given then the nz parameter is ignored 1740 1741 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1742 @*/ 1743 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 1744 { 1745 PetscErrorCode ierr; 1746 1747 PetscFunctionBegin; 1748 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1749 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 1750 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1751 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 1752 PetscFunctionReturn(0); 1753 } 1754 1755 #undef __FUNCT__ 1756 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1757 PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1758 { 1759 Mat C; 1760 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1761 PetscErrorCode ierr; 1762 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 1763 1764 PetscFunctionBegin; 1765 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1766 1767 *B = 0; 1768 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 1769 ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr); 1770 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1771 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1772 c = (Mat_SeqSBAIJ*)C->data; 1773 1774 C->preallocated = PETSC_TRUE; 1775 C->factor = A->factor; 1776 c->row = 0; 1777 c->icol = 0; 1778 c->saved_values = 0; 1779 c->keepzeroedrows = a->keepzeroedrows; 1780 C->assembled = PETSC_TRUE; 1781 1782 ierr = PetscMapCopy(((PetscObject)A)->comm,&A->rmap,&C->rmap);CHKERRQ(ierr); 1783 ierr = PetscMapCopy(((PetscObject)A)->comm,&A->cmap,&C->cmap);CHKERRQ(ierr); 1784 c->bs2 = a->bs2; 1785 c->mbs = a->mbs; 1786 c->nbs = a->nbs; 1787 1788 ierr = PetscMalloc2((mbs+1),PetscInt,&c->imax,(mbs+1),PetscInt,&c->ilen);CHKERRQ(ierr); 1789 for (i=0; i<mbs; i++) { 1790 c->imax[i] = a->imax[i]; 1791 c->ilen[i] = a->ilen[i]; 1792 } 1793 1794 /* allocate the matrix space */ 1795 ierr = PetscMalloc3(bs2*nz,MatScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 1796 c->singlemalloc = PETSC_TRUE; 1797 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1798 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt)));CHKERRQ(ierr); 1799 if (mbs > 0) { 1800 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1801 if (cpvalues == MAT_COPY_VALUES) { 1802 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1803 } else { 1804 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1805 } 1806 } 1807 1808 c->roworiented = a->roworiented; 1809 c->nonew = a->nonew; 1810 1811 if (a->diag) { 1812 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 1813 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1814 for (i=0; i<mbs; i++) { 1815 c->diag[i] = a->diag[i]; 1816 } 1817 } else c->diag = 0; 1818 c->nz = a->nz; 1819 c->maxnz = a->maxnz; 1820 c->solve_work = 0; 1821 c->mult_work = 0; 1822 c->free_a = PETSC_TRUE; 1823 c->free_ij = PETSC_TRUE; 1824 *B = C; 1825 ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 1826 PetscFunctionReturn(0); 1827 } 1828 1829 #undef __FUNCT__ 1830 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1831 PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer, MatType type,Mat *A) 1832 { 1833 Mat_SeqSBAIJ *a; 1834 Mat B; 1835 PetscErrorCode ierr; 1836 int fd; 1837 PetscMPIInt size; 1838 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 1839 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 1840 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 1841 PetscInt *masked,nmask,tmp,bs2,ishift; 1842 PetscScalar *aa; 1843 MPI_Comm comm = ((PetscObject)viewer)->comm; 1844 1845 PetscFunctionBegin; 1846 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1847 bs2 = bs*bs; 1848 1849 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1850 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1851 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1852 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1853 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1854 M = header[1]; N = header[2]; nz = header[3]; 1855 1856 if (header[3] < 0) { 1857 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1858 } 1859 1860 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1861 1862 /* 1863 This code adds extra rows to make sure the number of rows is 1864 divisible by the blocksize 1865 */ 1866 mbs = M/bs; 1867 extra_rows = bs - M + bs*(mbs); 1868 if (extra_rows == bs) extra_rows = 0; 1869 else mbs++; 1870 if (extra_rows) { 1871 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 1872 } 1873 1874 /* read in row lengths */ 1875 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1876 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1877 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1878 1879 /* read in column indices */ 1880 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1881 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1882 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1883 1884 /* loop over row lengths determining block row lengths */ 1885 ierr = PetscMalloc(mbs*sizeof(PetscInt),&s_browlengths);CHKERRQ(ierr); 1886 ierr = PetscMemzero(s_browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1887 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 1888 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1889 masked = mask + mbs; 1890 rowcount = 0; nzcount = 0; 1891 for (i=0; i<mbs; i++) { 1892 nmask = 0; 1893 for (j=0; j<bs; j++) { 1894 kmax = rowlengths[rowcount]; 1895 for (k=0; k<kmax; k++) { 1896 tmp = jj[nzcount++]/bs; /* block col. index */ 1897 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1898 } 1899 rowcount++; 1900 } 1901 s_browlengths[i] += nmask; 1902 1903 /* zero out the mask elements we set */ 1904 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1905 } 1906 1907 /* create our matrix */ 1908 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 1909 ierr = MatSetSizes(B,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 1910 ierr = MatSetType(B,type);CHKERRQ(ierr); 1911 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(B,bs,0,s_browlengths);CHKERRQ(ierr); 1912 a = (Mat_SeqSBAIJ*)B->data; 1913 1914 /* set matrix "i" values */ 1915 a->i[0] = 0; 1916 for (i=1; i<= mbs; i++) { 1917 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1918 a->ilen[i-1] = s_browlengths[i-1]; 1919 } 1920 a->nz = a->i[mbs]; 1921 1922 /* read in nonzero values */ 1923 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1924 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1925 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1926 1927 /* set "a" and "j" values into matrix */ 1928 nzcount = 0; jcount = 0; 1929 for (i=0; i<mbs; i++) { 1930 nzcountb = nzcount; 1931 nmask = 0; 1932 for (j=0; j<bs; j++) { 1933 kmax = rowlengths[i*bs+j]; 1934 for (k=0; k<kmax; k++) { 1935 tmp = jj[nzcount++]/bs; /* block col. index */ 1936 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1937 } 1938 } 1939 /* sort the masked values */ 1940 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1941 1942 /* set "j" values into matrix */ 1943 maskcount = 1; 1944 for (j=0; j<nmask; j++) { 1945 a->j[jcount++] = masked[j]; 1946 mask[masked[j]] = maskcount++; 1947 } 1948 1949 /* set "a" values into matrix */ 1950 ishift = bs2*a->i[i]; 1951 for (j=0; j<bs; j++) { 1952 kmax = rowlengths[i*bs+j]; 1953 for (k=0; k<kmax; k++) { 1954 tmp = jj[nzcountb]/bs ; /* block col. index */ 1955 if (tmp >= i){ 1956 block = mask[tmp] - 1; 1957 point = jj[nzcountb] - bs*tmp; 1958 idx = ishift + bs2*block + j + bs*point; 1959 a->a[idx] = aa[nzcountb]; 1960 } 1961 nzcountb++; 1962 } 1963 } 1964 /* zero out the mask elements we set */ 1965 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1966 } 1967 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1968 1969 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1970 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1971 ierr = PetscFree(aa);CHKERRQ(ierr); 1972 ierr = PetscFree(jj);CHKERRQ(ierr); 1973 ierr = PetscFree(mask);CHKERRQ(ierr); 1974 1975 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1976 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1977 ierr = MatView_Private(B);CHKERRQ(ierr); 1978 *A = B; 1979 PetscFunctionReturn(0); 1980 } 1981 1982 #undef __FUNCT__ 1983 #define __FUNCT__ "MatRelax_SeqSBAIJ" 1984 PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1985 { 1986 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1987 MatScalar *aa=a->a,*v,*v1; 1988 PetscScalar *x,*b,*t,sum,d; 1989 PetscErrorCode ierr; 1990 PetscInt m=a->mbs,bs=A->rmap.bs,*ai=a->i,*aj=a->j; 1991 PetscInt nz,nz1,*vj,*vj1,i; 1992 1993 PetscFunctionBegin; 1994 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 1995 1996 its = its*lits; 1997 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1998 1999 if (bs > 1) 2000 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 2001 2002 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2003 if (xx != bb) { 2004 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 2005 } else { 2006 b = x; 2007 } 2008 2009 if (!a->relax_work) { 2010 ierr = PetscMalloc(m*sizeof(PetscScalar),&a->relax_work);CHKERRQ(ierr); 2011 } 2012 t = a->relax_work; 2013 2014 if (flag & SOR_ZERO_INITIAL_GUESS) { 2015 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2016 ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr); 2017 2018 for (i=0; i<m; i++){ 2019 d = *(aa + ai[i]); /* diag[i] */ 2020 v = aa + ai[i] + 1; 2021 vj = aj + ai[i] + 1; 2022 nz = ai[i+1] - ai[i] - 1; 2023 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2024 x[i] = omega*t[i]/d; 2025 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 2026 } 2027 } 2028 2029 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2030 if (!(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP)){ 2031 t = b; 2032 } 2033 2034 for (i=m-1; i>=0; i--){ 2035 d = *(aa + ai[i]); 2036 v = aa + ai[i] + 1; 2037 vj = aj + ai[i] + 1; 2038 nz = ai[i+1] - ai[i] - 1; 2039 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2040 sum = t[i]; 2041 while (nz--) sum -= x[*vj++]*(*v++); 2042 x[i] = (1-omega)*x[i] + omega*sum/d; 2043 } 2044 t = a->relax_work; 2045 } 2046 its--; 2047 } 2048 2049 while (its--) { 2050 /* 2051 forward sweep: 2052 for i=0,...,m-1: 2053 sum[i] = (b[i] - U(i,:)x )/d[i]; 2054 x[i] = (1-omega)x[i] + omega*sum[i]; 2055 b = b - x[i]*U^T(i,:); 2056 2057 */ 2058 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2059 ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr); 2060 2061 for (i=0; i<m; i++){ 2062 d = *(aa + ai[i]); /* diag[i] */ 2063 v = aa + ai[i] + 1; v1=v; 2064 vj = aj + ai[i] + 1; vj1=vj; 2065 nz = ai[i+1] - ai[i] - 1; nz1=nz; 2066 sum = t[i]; 2067 ierr = PetscLogFlops(4*nz-2);CHKERRQ(ierr); 2068 while (nz1--) sum -= (*v1++)*x[*vj1++]; 2069 x[i] = (1-omega)*x[i] + omega*sum/d; 2070 while (nz--) t[*vj++] -= x[i]*(*v++); 2071 } 2072 } 2073 2074 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2075 /* 2076 backward sweep: 2077 b = b - x[i]*U^T(i,:), i=0,...,n-2 2078 for i=m-1,...,0: 2079 sum[i] = (b[i] - U(i,:)x )/d[i]; 2080 x[i] = (1-omega)x[i] + omega*sum[i]; 2081 */ 2082 /* if there was a forward sweep done above then I thing the next two for loops are not needed */ 2083 ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr); 2084 2085 for (i=0; i<m-1; i++){ /* update rhs */ 2086 v = aa + ai[i] + 1; 2087 vj = aj + ai[i] + 1; 2088 nz = ai[i+1] - ai[i] - 1; 2089 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2090 while (nz--) t[*vj++] -= x[i]*(*v++); 2091 } 2092 for (i=m-1; i>=0; i--){ 2093 d = *(aa + ai[i]); 2094 v = aa + ai[i] + 1; 2095 vj = aj + ai[i] + 1; 2096 nz = ai[i+1] - ai[i] - 1; 2097 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2098 sum = t[i]; 2099 while (nz--) sum -= x[*vj++]*(*v++); 2100 x[i] = (1-omega)*x[i] + omega*sum/d; 2101 } 2102 } 2103 } 2104 2105 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2106 if (bb != xx) { 2107 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2108 } 2109 PetscFunctionReturn(0); 2110 } 2111 2112 #undef __FUNCT__ 2113 #define __FUNCT__ "MatCreateSeqSBAIJWithArrays" 2114 /*@ 2115 MatCreateSeqSBAIJWithArrays - Creates an sequential SBAIJ matrix using matrix elements 2116 (upper triangular entries in CSR format) provided by the user. 2117 2118 Collective on MPI_Comm 2119 2120 Input Parameters: 2121 + comm - must be an MPI communicator of size 1 2122 . bs - size of block 2123 . m - number of rows 2124 . n - number of columns 2125 . i - row indices 2126 . j - column indices 2127 - a - matrix values 2128 2129 Output Parameter: 2130 . mat - the matrix 2131 2132 Level: intermediate 2133 2134 Notes: 2135 The i, j, and a arrays are not copied by this routine, the user must free these arrays 2136 once the matrix is destroyed 2137 2138 You cannot set new nonzero locations into this matrix, that will generate an error. 2139 2140 The i and j indices are 0 based 2141 2142 .seealso: MatCreate(), MatCreateMPISBAIJ(), MatCreateSeqSBAIJ() 2143 2144 @*/ 2145 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqSBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 2146 { 2147 PetscErrorCode ierr; 2148 PetscInt ii; 2149 Mat_SeqSBAIJ *sbaij; 2150 2151 PetscFunctionBegin; 2152 if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 2153 if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2154 2155 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2156 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 2157 ierr = MatSetType(*mat,MATSEQSBAIJ);CHKERRQ(ierr); 2158 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 2159 sbaij = (Mat_SeqSBAIJ*)(*mat)->data; 2160 ierr = PetscMalloc2(m,PetscInt,&sbaij->imax,m,PetscInt,&sbaij->ilen);CHKERRQ(ierr); 2161 2162 sbaij->i = i; 2163 sbaij->j = j; 2164 sbaij->a = a; 2165 sbaij->singlemalloc = PETSC_FALSE; 2166 sbaij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 2167 sbaij->free_a = PETSC_FALSE; 2168 sbaij->free_ij = PETSC_FALSE; 2169 2170 for (ii=0; ii<m; ii++) { 2171 sbaij->ilen[ii] = sbaij->imax[ii] = i[ii+1] - i[ii]; 2172 #if defined(PETSC_USE_DEBUG) 2173 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 2174 #endif 2175 } 2176 #if defined(PETSC_USE_DEBUG) 2177 for (ii=0; ii<sbaij->i[m]; ii++) { 2178 if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 2179 if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 2180 } 2181 #endif 2182 2183 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2184 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2185 PetscFunctionReturn(0); 2186 } 2187 2188 2189 2190 2191 2192