1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the SBAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 8 #include "src/inline/spops.h" 9 #include "src/mat/impls/sbaij/seq/sbaij.h" 10 11 #define CHUNKSIZE 10 12 13 /* 14 Checks for missing diagonals 15 */ 16 #undef __FUNCT__ 17 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 18 PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A,PetscTruth *missing,PetscInt *dd) 19 { 20 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 21 PetscErrorCode ierr; 22 PetscInt *diag,*jj = a->j,i; 23 24 PetscFunctionBegin; 25 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 26 diag = a->diag; 27 *missing = PETSC_FALSE; 28 for (i=0; i<a->mbs; i++) { 29 if (jj[diag[i]] != i) { 30 *missing = PETSC_TRUE; 31 if (dd) *dd = i; 32 break; 33 } 34 } 35 PetscFunctionReturn(0); 36 } 37 38 #undef __FUNCT__ 39 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 40 PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A) 41 { 42 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 43 PetscErrorCode ierr; 44 PetscInt i; 45 46 PetscFunctionBegin; 47 if (!a->diag) { 48 ierr = PetscMalloc(a->mbs*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 49 } 50 for (i=0; i<a->mbs; i++) a->diag[i] = a->i[i]; 51 PetscFunctionReturn(0); 52 } 53 54 #undef __FUNCT__ 55 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 56 static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 57 { 58 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 59 PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap.bs; 60 PetscErrorCode ierr; 61 62 PetscFunctionBegin; 63 *nn = n; 64 if (!ia) PetscFunctionReturn(0); 65 if (!blockcompressed) { 66 /* malloc & create the natural set of indices */ 67 ierr = PetscMalloc2((n+1)*bs,PetscInt,ia,nz*bs,PetscInt,ja);CHKERRQ(ierr); 68 for (i=0; i<n+1; i++) { 69 for (j=0; j<bs; j++) { 70 *ia[i*bs+j] = a->i[i]*bs+j+oshift; 71 } 72 } 73 for (i=0; i<nz; i++) { 74 for (j=0; j<bs; j++) { 75 *ja[i*bs+j] = a->j[i]*bs+j+oshift; 76 } 77 } 78 } else { /* blockcompressed */ 79 if (oshift == 1) { 80 /* temporarily add 1 to i and j indices */ 81 for (i=0; i<nz; i++) a->j[i]++; 82 for (i=0; i<n+1; i++) a->i[i]++; 83 } 84 *ia = a->i; *ja = a->j; 85 } 86 87 PetscFunctionReturn(0); 88 } 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 92 static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 93 { 94 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 95 PetscInt i,n = a->mbs,nz = a->i[n]; 96 PetscErrorCode ierr; 97 98 PetscFunctionBegin; 99 if (!ia) PetscFunctionReturn(0); 100 101 if (!blockcompressed) { 102 ierr = PetscFree2(*ia,*ja);CHKERRQ(ierr); 103 } else if (oshift == 1) { /* blockcompressed */ 104 for (i=0; i<nz; i++) a->j[i]--; 105 for (i=0; i<n+1; i++) a->i[i]--; 106 } 107 108 PetscFunctionReturn(0); 109 } 110 111 #undef __FUNCT__ 112 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 113 PetscErrorCode MatDestroy_SeqSBAIJ(Mat A) 114 { 115 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 116 PetscErrorCode ierr; 117 118 PetscFunctionBegin; 119 #if defined(PETSC_USE_LOG) 120 PetscLogObjectState((PetscObject)A,"Rows=%D, NZ=%D",A->rmap.N,a->nz); 121 #endif 122 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 123 if (a->row) {ierr = ISDestroy(a->row);CHKERRQ(ierr);} 124 if (a->col){ierr = ISDestroy(a->col);CHKERRQ(ierr);} 125 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 126 ierr = PetscFree(a->diag);CHKERRQ(ierr); 127 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 128 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 129 ierr = PetscFree(a->relax_work);CHKERRQ(ierr); 130 ierr = PetscFree(a->solves_work);CHKERRQ(ierr); 131 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 132 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 133 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 134 135 ierr = PetscFree(a->inew);CHKERRQ(ierr); 136 ierr = PetscFree(a);CHKERRQ(ierr); 137 138 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 139 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 140 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 141 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 142 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 143 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqbaij_C","",PETSC_NULL);CHKERRQ(ierr); 144 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 145 PetscFunctionReturn(0); 146 } 147 148 #undef __FUNCT__ 149 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 150 PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op,PetscTruth flg) 151 { 152 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 153 PetscErrorCode ierr; 154 155 PetscFunctionBegin; 156 switch (op) { 157 case MAT_ROW_ORIENTED: 158 a->roworiented = flg; 159 break; 160 case MAT_KEEP_ZEROED_ROWS: 161 a->keepzeroedrows = flg; 162 break; 163 case MAT_NEW_NONZERO_LOCATIONS: 164 a->nonew = (flg ? 0 : 1); 165 break; 166 case MAT_NEW_NONZERO_LOCATION_ERR: 167 a->nonew = (flg ? -1 : 0); 168 break; 169 case MAT_NEW_NONZERO_ALLOCATION_ERR: 170 a->nonew = (flg ? -2 : 0); 171 break; 172 case MAT_NEW_DIAGONALS: 173 case MAT_IGNORE_OFF_PROC_ENTRIES: 174 case MAT_USE_HASH_TABLE: 175 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 176 break; 177 case MAT_HERMITIAN: 178 if (flg) SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 179 case MAT_SYMMETRIC: 180 case MAT_STRUCTURALLY_SYMMETRIC: 181 case MAT_SYMMETRY_ETERNAL: 182 if (!flg) SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 183 ierr = PetscInfo1(A,"Option %s not relevent\n",MatOptions[op]);CHKERRQ(ierr); 184 break; 185 case MAT_IGNORE_LOWER_TRIANGULAR: 186 a->ignore_ltriangular = flg; 187 break; 188 case MAT_ERROR_LOWER_TRIANGULAR: 189 a->ignore_ltriangular = flg; 190 break; 191 case MAT_GETROW_UPPERTRIANGULAR: 192 a->getrow_utriangular = flg; 193 break; 194 default: 195 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 196 } 197 PetscFunctionReturn(0); 198 } 199 200 #undef __FUNCT__ 201 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 202 PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **v) 203 { 204 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 205 PetscErrorCode ierr; 206 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2; 207 MatScalar *aa,*aa_i; 208 PetscScalar *v_i; 209 210 PetscFunctionBegin; 211 if (A && !a->getrow_utriangular) SETERRQ(PETSC_ERR_SUP,"MatGetRow is not supported for SBAIJ matrix format. Getting the upper triangular part of row, run with -mat_getrow_uppertriangular, call MatSetOption(mat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE) or MatGetRowUpperTriangular()"); 212 /* Get the upper triangular part of the row */ 213 bs = A->rmap.bs; 214 ai = a->i; 215 aj = a->j; 216 aa = a->a; 217 bs2 = a->bs2; 218 219 if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %D out of range", row); 220 221 bn = row/bs; /* Block number */ 222 bp = row % bs; /* Block position */ 223 M = ai[bn+1] - ai[bn]; 224 *ncols = bs*M; 225 226 if (v) { 227 *v = 0; 228 if (*ncols) { 229 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 230 for (i=0; i<M; i++) { /* for each block in the block row */ 231 v_i = *v + i*bs; 232 aa_i = aa + bs2*(ai[bn] + i); 233 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 234 } 235 } 236 } 237 238 if (cols) { 239 *cols = 0; 240 if (*ncols) { 241 ierr = PetscMalloc((*ncols+row)*sizeof(PetscInt),cols);CHKERRQ(ierr); 242 for (i=0; i<M; i++) { /* for each block in the block row */ 243 cols_i = *cols + i*bs; 244 itmp = bs*aj[ai[bn] + i]; 245 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 246 } 247 } 248 } 249 250 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 251 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 252 #ifdef column_search 253 v_i = *v + M*bs; 254 cols_i = *cols + M*bs; 255 for (i=0; i<bn; i++){ /* for each block row */ 256 M = ai[i+1] - ai[i]; 257 for (j=0; j<M; j++){ 258 itmp = aj[ai[i] + j]; /* block column value */ 259 if (itmp == bn){ 260 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 261 for (k=0; k<bs; k++) { 262 *cols_i++ = i*bs+k; 263 *v_i++ = aa_i[k]; 264 } 265 *ncols += bs; 266 break; 267 } 268 } 269 } 270 #endif 271 PetscFunctionReturn(0); 272 } 273 274 #undef __FUNCT__ 275 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 276 PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 277 { 278 PetscErrorCode ierr; 279 280 PetscFunctionBegin; 281 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 282 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 283 PetscFunctionReturn(0); 284 } 285 286 #undef __FUNCT__ 287 #define __FUNCT__ "MatGetRowUpperTriangular_SeqSBAIJ" 288 PetscErrorCode MatGetRowUpperTriangular_SeqSBAIJ(Mat A) 289 { 290 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 291 292 PetscFunctionBegin; 293 a->getrow_utriangular = PETSC_TRUE; 294 PetscFunctionReturn(0); 295 } 296 #undef __FUNCT__ 297 #define __FUNCT__ "MatRestoreRowUpperTriangular_SeqSBAIJ" 298 PetscErrorCode MatRestoreRowUpperTriangular_SeqSBAIJ(Mat A) 299 { 300 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 301 302 PetscFunctionBegin; 303 a->getrow_utriangular = PETSC_FALSE; 304 PetscFunctionReturn(0); 305 } 306 307 #undef __FUNCT__ 308 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 309 PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,Mat *B) 310 { 311 PetscErrorCode ierr; 312 PetscFunctionBegin; 313 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 314 PetscFunctionReturn(0); 315 } 316 317 #undef __FUNCT__ 318 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 319 static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 320 { 321 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 322 PetscErrorCode ierr; 323 PetscInt i,j,bs = A->rmap.bs,k,l,bs2=a->bs2; 324 const char *name; 325 PetscViewerFormat format; 326 327 PetscFunctionBegin; 328 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 329 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 330 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 331 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 332 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 333 Mat aij; 334 335 if (A->factor && bs>1){ 336 ierr = PetscPrintf(PETSC_COMM_SELF,"Warning: matrix is factored with bs>1. MatView() with PETSC_VIEWER_ASCII_MATLAB is not supported and ignored!\n");CHKERRQ(ierr); 337 PetscFunctionReturn(0); 338 } 339 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 340 ierr = MatView(aij,viewer);CHKERRQ(ierr); 341 ierr = MatDestroy(aij);CHKERRQ(ierr); 342 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 343 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 344 for (i=0; i<a->mbs; i++) { 345 for (j=0; j<bs; j++) { 346 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 347 for (k=a->i[i]; k<a->i[i+1]; k++) { 348 for (l=0; l<bs; l++) { 349 #if defined(PETSC_USE_COMPLEX) 350 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 351 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 352 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 353 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 354 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 355 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 356 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 357 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 358 } 359 #else 360 if (a->a[bs2*k + l*bs + j] != 0.0) { 361 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 362 } 363 #endif 364 } 365 } 366 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 367 } 368 } 369 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 370 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 371 PetscFunctionReturn(0); 372 } else { 373 if (A->factor && bs>1){ 374 ierr = PetscPrintf(PETSC_COMM_SELF,"Warning: matrix is factored. MatView_SeqSBAIJ_ASCII() may not display complete or logically correct entries!\n");CHKERRQ(ierr); 375 } 376 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 377 for (i=0; i<a->mbs; i++) { 378 for (j=0; j<bs; j++) { 379 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 380 for (k=a->i[i]; k<a->i[i+1]; k++) { 381 for (l=0; l<bs; l++) { 382 #if defined(PETSC_USE_COMPLEX) 383 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 384 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 385 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 386 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 387 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 388 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 389 } else { 390 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 391 } 392 #else 393 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 394 #endif 395 } 396 } 397 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 398 } 399 } 400 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 401 } 402 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 403 PetscFunctionReturn(0); 404 } 405 406 #undef __FUNCT__ 407 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 408 static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 409 { 410 Mat A = (Mat) Aa; 411 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 412 PetscErrorCode ierr; 413 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2; 414 PetscMPIInt rank; 415 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 416 MatScalar *aa; 417 MPI_Comm comm; 418 PetscViewer viewer; 419 420 PetscFunctionBegin; 421 /* 422 This is nasty. If this is called from an originally parallel matrix 423 then all processes call this,but only the first has the matrix so the 424 rest should return immediately. 425 */ 426 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 427 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 428 if (rank) PetscFunctionReturn(0); 429 430 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 431 432 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 433 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 434 435 /* loop over matrix elements drawing boxes */ 436 color = PETSC_DRAW_BLUE; 437 for (i=0,row=0; i<mbs; i++,row+=bs) { 438 for (j=a->i[i]; j<a->i[i+1]; j++) { 439 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 440 x_l = a->j[j]*bs; x_r = x_l + 1.0; 441 aa = a->a + j*bs2; 442 for (k=0; k<bs; k++) { 443 for (l=0; l<bs; l++) { 444 if (PetscRealPart(*aa++) >= 0.) continue; 445 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 446 } 447 } 448 } 449 } 450 color = PETSC_DRAW_CYAN; 451 for (i=0,row=0; i<mbs; i++,row+=bs) { 452 for (j=a->i[i]; j<a->i[i+1]; j++) { 453 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 454 x_l = a->j[j]*bs; x_r = x_l + 1.0; 455 aa = a->a + j*bs2; 456 for (k=0; k<bs; k++) { 457 for (l=0; l<bs; l++) { 458 if (PetscRealPart(*aa++) != 0.) continue; 459 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 460 } 461 } 462 } 463 } 464 465 color = PETSC_DRAW_RED; 466 for (i=0,row=0; i<mbs; i++,row+=bs) { 467 for (j=a->i[i]; j<a->i[i+1]; j++) { 468 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 469 x_l = a->j[j]*bs; x_r = x_l + 1.0; 470 aa = a->a + j*bs2; 471 for (k=0; k<bs; k++) { 472 for (l=0; l<bs; l++) { 473 if (PetscRealPart(*aa++) <= 0.) continue; 474 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 475 } 476 } 477 } 478 } 479 PetscFunctionReturn(0); 480 } 481 482 #undef __FUNCT__ 483 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 484 static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 485 { 486 PetscErrorCode ierr; 487 PetscReal xl,yl,xr,yr,w,h; 488 PetscDraw draw; 489 PetscTruth isnull; 490 491 PetscFunctionBegin; 492 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 493 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 494 495 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 496 xr = A->rmap.N; yr = A->rmap.N; h = yr/10.0; w = xr/10.0; 497 xr += w; yr += h; xl = -w; yl = -h; 498 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 499 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 500 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 501 PetscFunctionReturn(0); 502 } 503 504 #undef __FUNCT__ 505 #define __FUNCT__ "MatView_SeqSBAIJ" 506 PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 507 { 508 PetscErrorCode ierr; 509 PetscTruth iascii,isdraw; 510 511 PetscFunctionBegin; 512 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 513 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 514 if (iascii){ 515 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 516 } else if (isdraw) { 517 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 518 } else { 519 Mat B; 520 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 521 ierr = MatView(B,viewer);CHKERRQ(ierr); 522 ierr = MatDestroy(B);CHKERRQ(ierr); 523 } 524 PetscFunctionReturn(0); 525 } 526 527 528 #undef __FUNCT__ 529 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 530 PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 531 { 532 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 533 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 534 PetscInt *ai = a->i,*ailen = a->ilen; 535 PetscInt brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2; 536 MatScalar *ap,*aa = a->a; 537 538 PetscFunctionBegin; 539 for (k=0; k<m; k++) { /* loop over rows */ 540 row = im[k]; brow = row/bs; 541 if (row < 0) {v += n; continue;} /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",row); */ 542 if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1); 543 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 544 nrow = ailen[brow]; 545 for (l=0; l<n; l++) { /* loop over columns */ 546 if (in[l] < 0) {v++; continue;} /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",in[l]); */ 547 if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1); 548 col = in[l] ; 549 bcol = col/bs; 550 cidx = col%bs; 551 ridx = row%bs; 552 high = nrow; 553 low = 0; /* assume unsorted */ 554 while (high-low > 5) { 555 t = (low+high)/2; 556 if (rp[t] > bcol) high = t; 557 else low = t; 558 } 559 for (i=low; i<high; i++) { 560 if (rp[i] > bcol) break; 561 if (rp[i] == bcol) { 562 *v++ = ap[bs2*i+bs*cidx+ridx]; 563 goto finished; 564 } 565 } 566 *v++ = 0.0; 567 finished:; 568 } 569 } 570 PetscFunctionReturn(0); 571 } 572 573 574 #undef __FUNCT__ 575 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 576 PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 577 { 578 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 579 PetscErrorCode ierr; 580 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 581 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 582 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval; 583 PetscTruth roworiented=a->roworiented; 584 const MatScalar *value = v; 585 MatScalar *ap,*aa = a->a,*bap; 586 587 PetscFunctionBegin; 588 if (roworiented) { 589 stepval = (n-1)*bs; 590 } else { 591 stepval = (m-1)*bs; 592 } 593 for (k=0; k<m; k++) { /* loop over added rows */ 594 row = im[k]; 595 if (row < 0) continue; 596 #if defined(PETSC_USE_DEBUG) 597 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 598 #endif 599 rp = aj + ai[row]; 600 ap = aa + bs2*ai[row]; 601 rmax = imax[row]; 602 nrow = ailen[row]; 603 low = 0; 604 high = nrow; 605 for (l=0; l<n; l++) { /* loop over added columns */ 606 if (in[l] < 0) continue; 607 col = in[l]; 608 #if defined(PETSC_USE_DEBUG) 609 if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",col,a->nbs-1); 610 #endif 611 if (col < row) continue; /* ignore lower triangular block */ 612 if (roworiented) { 613 value = v + k*(stepval+bs)*bs + l*bs; 614 } else { 615 value = v + l*(stepval+bs)*bs + k*bs; 616 } 617 if (col <= lastcol) low = 0; else high = nrow; 618 lastcol = col; 619 while (high-low > 7) { 620 t = (low+high)/2; 621 if (rp[t] > col) high = t; 622 else low = t; 623 } 624 for (i=low; i<high; i++) { 625 if (rp[i] > col) break; 626 if (rp[i] == col) { 627 bap = ap + bs2*i; 628 if (roworiented) { 629 if (is == ADD_VALUES) { 630 for (ii=0; ii<bs; ii++,value+=stepval) { 631 for (jj=ii; jj<bs2; jj+=bs) { 632 bap[jj] += *value++; 633 } 634 } 635 } else { 636 for (ii=0; ii<bs; ii++,value+=stepval) { 637 for (jj=ii; jj<bs2; jj+=bs) { 638 bap[jj] = *value++; 639 } 640 } 641 } 642 } else { 643 if (is == ADD_VALUES) { 644 for (ii=0; ii<bs; ii++,value+=stepval) { 645 for (jj=0; jj<bs; jj++) { 646 *bap++ += *value++; 647 } 648 } 649 } else { 650 for (ii=0; ii<bs; ii++,value+=stepval) { 651 for (jj=0; jj<bs; jj++) { 652 *bap++ = *value++; 653 } 654 } 655 } 656 } 657 goto noinsert2; 658 } 659 } 660 if (nonew == 1) goto noinsert2; 661 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 662 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 663 N = nrow++ - 1; high++; 664 /* shift up all the later entries in this row */ 665 for (ii=N; ii>=i; ii--) { 666 rp[ii+1] = rp[ii]; 667 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 668 } 669 if (N >= i) { 670 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 671 } 672 rp[i] = col; 673 bap = ap + bs2*i; 674 if (roworiented) { 675 for (ii=0; ii<bs; ii++,value+=stepval) { 676 for (jj=ii; jj<bs2; jj+=bs) { 677 bap[jj] = *value++; 678 } 679 } 680 } else { 681 for (ii=0; ii<bs; ii++,value+=stepval) { 682 for (jj=0; jj<bs; jj++) { 683 *bap++ = *value++; 684 } 685 } 686 } 687 noinsert2:; 688 low = i; 689 } 690 ailen[row] = nrow; 691 } 692 PetscFunctionReturn(0); 693 } 694 695 #undef __FUNCT__ 696 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 697 PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 698 { 699 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 700 PetscErrorCode ierr; 701 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 702 PetscInt m = A->rmap.N,*ip,N,*ailen = a->ilen; 703 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 704 MatScalar *aa = a->a,*ap; 705 706 PetscFunctionBegin; 707 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 708 709 if (m) rmax = ailen[0]; 710 for (i=1; i<mbs; i++) { 711 /* move each row back by the amount of empty slots (fshift) before it*/ 712 fshift += imax[i-1] - ailen[i-1]; 713 rmax = PetscMax(rmax,ailen[i]); 714 if (fshift) { 715 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 716 N = ailen[i]; 717 for (j=0; j<N; j++) { 718 ip[j-fshift] = ip[j]; 719 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 720 } 721 } 722 ai[i] = ai[i-1] + ailen[i-1]; 723 } 724 if (mbs) { 725 fshift += imax[mbs-1] - ailen[mbs-1]; 726 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 727 } 728 /* reset ilen and imax for each row */ 729 for (i=0; i<mbs; i++) { 730 ailen[i] = imax[i] = ai[i+1] - ai[i]; 731 } 732 a->nz = ai[mbs]; 733 734 /* diagonals may have moved, reset it */ 735 if (a->diag) { 736 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 737 } 738 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->rmap.N,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 739 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 740 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 741 a->reallocs = 0; 742 A->info.nz_unneeded = (PetscReal)fshift*bs2; 743 PetscFunctionReturn(0); 744 } 745 746 /* 747 This function returns an array of flags which indicate the locations of contiguous 748 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 749 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 750 Assume: sizes should be long enough to hold all the values. 751 */ 752 #undef __FUNCT__ 753 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 754 PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 755 { 756 PetscInt i,j,k,row; 757 PetscTruth flg; 758 759 PetscFunctionBegin; 760 for (i=0,j=0; i<n; j++) { 761 row = idx[i]; 762 if (row%bs!=0) { /* Not the begining of a block */ 763 sizes[j] = 1; 764 i++; 765 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 766 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 767 i++; 768 } else { /* Begining of the block, so check if the complete block exists */ 769 flg = PETSC_TRUE; 770 for (k=1; k<bs; k++) { 771 if (row+k != idx[i+k]) { /* break in the block */ 772 flg = PETSC_FALSE; 773 break; 774 } 775 } 776 if (flg) { /* No break in the bs */ 777 sizes[j] = bs; 778 i+= bs; 779 } else { 780 sizes[j] = 1; 781 i++; 782 } 783 } 784 } 785 *bs_max = j; 786 PetscFunctionReturn(0); 787 } 788 789 790 /* Only add/insert a(i,j) with i<=j (blocks). 791 Any a(i,j) with i>j input by user is ingored. 792 */ 793 794 #undef __FUNCT__ 795 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 796 PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 797 { 798 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 799 PetscErrorCode ierr; 800 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 801 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 802 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol; 803 PetscInt ridx,cidx,bs2=a->bs2; 804 MatScalar *ap,value,*aa=a->a,*bap; 805 806 PetscFunctionBegin; 807 for (k=0; k<m; k++) { /* loop over added rows */ 808 row = im[k]; /* row number */ 809 brow = row/bs; /* block row number */ 810 if (row < 0) continue; 811 #if defined(PETSC_USE_DEBUG) 812 if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1); 813 #endif 814 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 815 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 816 rmax = imax[brow]; /* maximum space allocated for this row */ 817 nrow = ailen[brow]; /* actual length of this row */ 818 low = 0; 819 820 for (l=0; l<n; l++) { /* loop over added columns */ 821 if (in[l] < 0) continue; 822 #if defined(PETSC_USE_DEBUG) 823 if (in[l] >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->rmap.N-1); 824 #endif 825 col = in[l]; 826 bcol = col/bs; /* block col number */ 827 828 if (brow > bcol) { 829 if (a->ignore_ltriangular){ 830 continue; /* ignore lower triangular values */ 831 } else { 832 SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)"); 833 } 834 } 835 836 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 837 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 838 /* element value a(k,l) */ 839 if (roworiented) { 840 value = v[l + k*n]; 841 } else { 842 value = v[k + l*m]; 843 } 844 845 /* move pointer bap to a(k,l) quickly and add/insert value */ 846 if (col <= lastcol) low = 0; high = nrow; 847 lastcol = col; 848 while (high-low > 7) { 849 t = (low+high)/2; 850 if (rp[t] > bcol) high = t; 851 else low = t; 852 } 853 for (i=low; i<high; i++) { 854 if (rp[i] > bcol) break; 855 if (rp[i] == bcol) { 856 bap = ap + bs2*i + bs*cidx + ridx; 857 if (is == ADD_VALUES) *bap += value; 858 else *bap = value; 859 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 860 if (brow == bcol && ridx < cidx){ 861 bap = ap + bs2*i + bs*ridx + cidx; 862 if (is == ADD_VALUES) *bap += value; 863 else *bap = value; 864 } 865 goto noinsert1; 866 } 867 } 868 869 if (nonew == 1) goto noinsert1; 870 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 871 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 872 873 N = nrow++ - 1; high++; 874 /* shift up all the later entries in this row */ 875 for (ii=N; ii>=i; ii--) { 876 rp[ii+1] = rp[ii]; 877 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 878 } 879 if (N>=i) { 880 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 881 } 882 rp[i] = bcol; 883 ap[bs2*i + bs*cidx + ridx] = value; 884 noinsert1:; 885 low = i; 886 } 887 } /* end of loop over added columns */ 888 ailen[brow] = nrow; 889 } /* end of loop over added rows */ 890 PetscFunctionReturn(0); 891 } 892 893 #undef __FUNCT__ 894 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 895 PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info) 896 { 897 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 898 Mat outA; 899 PetscErrorCode ierr; 900 PetscTruth row_identity; 901 902 PetscFunctionBegin; 903 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels=0 is supported for in-place icc"); 904 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 905 if (!row_identity) SETERRQ(PETSC_ERR_SUP,"Matrix reordering is not supported"); 906 if (inA->rmap.bs != 1) SETERRQ1(PETSC_ERR_SUP,"Matrix block size %D is not supported",inA->rmap.bs); /* Need to replace MatCholeskyFactorSymbolic_SeqSBAIJ_MSR()! */ 907 908 outA = inA; 909 inA->factor = FACTOR_CHOLESKY; 910 911 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 912 /* 913 Blocksize < 8 have a special faster factorization/solver 914 for ICC(0) factorization with natural ordering 915 */ 916 switch (inA->rmap.bs){ /* Note: row_identity = PETSC_TRUE! */ 917 case 1: 918 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 919 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 920 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 921 inA->ops->solves = MatSolves_SeqSBAIJ_1; 922 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 923 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 924 ierr = PetscInfo(inA,"Using special in-place natural ordering solvetrans BS=1\n");CHKERRQ(ierr); 925 break; 926 case 2: 927 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 928 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 929 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 930 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_2_NaturalOrdering; 931 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering; 932 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=2\n");CHKERRQ(ierr); 933 break; 934 case 3: 935 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 936 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 937 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 938 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_3_NaturalOrdering; 939 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering; 940 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=3\n");CHKERRQ(ierr); 941 break; 942 case 4: 943 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 944 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 945 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 946 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_4_NaturalOrdering; 947 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering; 948 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=4\n");CHKERRQ(ierr); 949 break; 950 case 5: 951 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 952 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 953 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 954 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_5_NaturalOrdering; 955 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering; 956 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=5\n");CHKERRQ(ierr); 957 break; 958 case 6: 959 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 960 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 961 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 962 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_6_NaturalOrdering; 963 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering; 964 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=6\n");CHKERRQ(ierr); 965 break; 966 case 7: 967 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 968 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 969 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 970 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_7_NaturalOrdering; 971 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering; 972 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=7\n");CHKERRQ(ierr); 973 break; 974 default: 975 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering; 976 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_N_NaturalOrdering; 977 inA->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering; 978 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_N_NaturalOrdering; 979 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering; 980 break; 981 } 982 983 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 984 if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); } 985 a->row = row; 986 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 987 if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); } 988 a->col = row; 989 990 /* Create the invert permutation so that it can be used in MatCholeskyFactorNumeric() */ 991 if (a->icol) {ierr = ISInvertPermutation(row,PETSC_DECIDE, &a->icol);CHKERRQ(ierr);} 992 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 993 994 if (!a->solve_work) { 995 ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 996 ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr); 997 } 998 999 ierr = MatCholeskyFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 1000 PetscFunctionReturn(0); 1001 } 1002 1003 EXTERN_C_BEGIN 1004 #undef __FUNCT__ 1005 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1006 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,PetscInt *indices) 1007 { 1008 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1009 PetscInt i,nz,n; 1010 1011 PetscFunctionBegin; 1012 nz = baij->maxnz; 1013 n = mat->cmap.n; 1014 for (i=0; i<nz; i++) { 1015 baij->j[i] = indices[i]; 1016 } 1017 baij->nz = nz; 1018 for (i=0; i<n; i++) { 1019 baij->ilen[i] = baij->imax[i]; 1020 } 1021 PetscFunctionReturn(0); 1022 } 1023 EXTERN_C_END 1024 1025 #undef __FUNCT__ 1026 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1027 /*@ 1028 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1029 in the matrix. 1030 1031 Input Parameters: 1032 + mat - the SeqSBAIJ matrix 1033 - indices - the column indices 1034 1035 Level: advanced 1036 1037 Notes: 1038 This can be called if you have precomputed the nonzero structure of the 1039 matrix and want to provide it to the matrix object to improve the performance 1040 of the MatSetValues() operation. 1041 1042 You MUST have set the correct numbers of nonzeros per row in the call to 1043 MatCreateSeqSBAIJ(), and the columns indices MUST be sorted. 1044 1045 MUST be called before any calls to MatSetValues() 1046 1047 .seealso: MatCreateSeqSBAIJ 1048 @*/ 1049 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1050 { 1051 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1052 1053 PetscFunctionBegin; 1054 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1055 PetscValidPointer(indices,2); 1056 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1057 if (f) { 1058 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1059 } else { 1060 SETERRQ(PETSC_ERR_SUP,"Wrong type of matrix to set column indices"); 1061 } 1062 PetscFunctionReturn(0); 1063 } 1064 1065 #undef __FUNCT__ 1066 #define __FUNCT__ "MatCopy_SeqSBAIJ" 1067 PetscErrorCode MatCopy_SeqSBAIJ(Mat A,Mat B,MatStructure str) 1068 { 1069 PetscErrorCode ierr; 1070 1071 PetscFunctionBegin; 1072 /* If the two matrices have the same copy implementation, use fast copy. */ 1073 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 1074 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1075 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1076 1077 if (a->i[A->rmap.N] != b->i[B->rmap.N]) { 1078 SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 1079 } 1080 ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr); 1081 } else { 1082 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1083 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1084 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1085 } 1086 PetscFunctionReturn(0); 1087 } 1088 1089 #undef __FUNCT__ 1090 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1091 PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A) 1092 { 1093 PetscErrorCode ierr; 1094 1095 PetscFunctionBegin; 1096 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 1097 PetscFunctionReturn(0); 1098 } 1099 1100 #undef __FUNCT__ 1101 #define __FUNCT__ "MatGetArray_SeqSBAIJ" 1102 PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1103 { 1104 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1105 PetscFunctionBegin; 1106 *array = a->a; 1107 PetscFunctionReturn(0); 1108 } 1109 1110 #undef __FUNCT__ 1111 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ" 1112 PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1113 { 1114 PetscFunctionBegin; 1115 PetscFunctionReturn(0); 1116 } 1117 1118 #include "petscblaslapack.h" 1119 #undef __FUNCT__ 1120 #define __FUNCT__ "MatAXPY_SeqSBAIJ" 1121 PetscErrorCode MatAXPY_SeqSBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1122 { 1123 Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data; 1124 PetscErrorCode ierr; 1125 PetscInt i,bs=Y->rmap.bs,bs2,j; 1126 PetscBLASInt bnz = (PetscBLASInt)x->nz,one = 1; 1127 1128 PetscFunctionBegin; 1129 if (str == SAME_NONZERO_PATTERN) { 1130 PetscScalar alpha = a; 1131 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1132 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1133 if (y->xtoy && y->XtoY != X) { 1134 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1135 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1136 } 1137 if (!y->xtoy) { /* get xtoy */ 1138 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1139 y->XtoY = X; 1140 } 1141 bs2 = bs*bs; 1142 for (i=0; i<x->nz; i++) { 1143 j = 0; 1144 while (j < bs2){ 1145 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 1146 j++; 1147 } 1148 } 1149 ierr = PetscInfo3(0,"ratio of nnz_s(X)/nnz_s(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 1150 } else { 1151 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1152 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1153 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1154 } 1155 PetscFunctionReturn(0); 1156 } 1157 1158 #undef __FUNCT__ 1159 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ" 1160 PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg) 1161 { 1162 PetscFunctionBegin; 1163 *flg = PETSC_TRUE; 1164 PetscFunctionReturn(0); 1165 } 1166 1167 #undef __FUNCT__ 1168 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ" 1169 PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg) 1170 { 1171 PetscFunctionBegin; 1172 *flg = PETSC_TRUE; 1173 PetscFunctionReturn(0); 1174 } 1175 1176 #undef __FUNCT__ 1177 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ" 1178 PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg) 1179 { 1180 PetscFunctionBegin; 1181 *flg = PETSC_FALSE; 1182 PetscFunctionReturn(0); 1183 } 1184 1185 #undef __FUNCT__ 1186 #define __FUNCT__ "MatRealPart_SeqSBAIJ" 1187 PetscErrorCode MatRealPart_SeqSBAIJ(Mat A) 1188 { 1189 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1190 PetscInt i,nz = a->bs2*a->i[a->mbs]; 1191 PetscScalar *aa = a->a; 1192 1193 PetscFunctionBegin; 1194 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 1195 PetscFunctionReturn(0); 1196 } 1197 1198 #undef __FUNCT__ 1199 #define __FUNCT__ "MatImaginaryPart_SeqSBAIJ" 1200 PetscErrorCode MatImaginaryPart_SeqSBAIJ(Mat A) 1201 { 1202 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1203 PetscInt i,nz = a->bs2*a->i[a->mbs]; 1204 PetscScalar *aa = a->a; 1205 1206 PetscFunctionBegin; 1207 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 1208 PetscFunctionReturn(0); 1209 } 1210 1211 /* -------------------------------------------------------------------*/ 1212 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1213 MatGetRow_SeqSBAIJ, 1214 MatRestoreRow_SeqSBAIJ, 1215 MatMult_SeqSBAIJ_N, 1216 /* 4*/ MatMultAdd_SeqSBAIJ_N, 1217 MatMult_SeqSBAIJ_N, /* transpose versions are same as non-transpose versions */ 1218 MatMultAdd_SeqSBAIJ_N, 1219 MatSolve_SeqSBAIJ_N, 1220 0, 1221 0, 1222 /*10*/ 0, 1223 0, 1224 MatCholeskyFactor_SeqSBAIJ, 1225 MatRelax_SeqSBAIJ, 1226 MatTranspose_SeqSBAIJ, 1227 /*15*/ MatGetInfo_SeqSBAIJ, 1228 MatEqual_SeqSBAIJ, 1229 MatGetDiagonal_SeqSBAIJ, 1230 MatDiagonalScale_SeqSBAIJ, 1231 MatNorm_SeqSBAIJ, 1232 /*20*/ 0, 1233 MatAssemblyEnd_SeqSBAIJ, 1234 0, 1235 MatSetOption_SeqSBAIJ, 1236 MatZeroEntries_SeqSBAIJ, 1237 /*25*/ 0, 1238 0, 1239 0, 1240 MatCholeskyFactorSymbolic_SeqSBAIJ, 1241 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1242 /*30*/ MatSetUpPreallocation_SeqSBAIJ, 1243 0, 1244 MatICCFactorSymbolic_SeqSBAIJ, 1245 MatGetArray_SeqSBAIJ, 1246 MatRestoreArray_SeqSBAIJ, 1247 /*35*/ MatDuplicate_SeqSBAIJ, 1248 MatForwardSolve_SeqSBAIJ_N, 1249 MatBackwardSolve_SeqSBAIJ_N, 1250 0, 1251 MatICCFactor_SeqSBAIJ, 1252 /*40*/ MatAXPY_SeqSBAIJ, 1253 MatGetSubMatrices_SeqSBAIJ, 1254 MatIncreaseOverlap_SeqSBAIJ, 1255 MatGetValues_SeqSBAIJ, 1256 MatCopy_SeqSBAIJ, 1257 /*45*/ 0, 1258 MatScale_SeqSBAIJ, 1259 0, 1260 0, 1261 0, 1262 /*50*/ 0, 1263 MatGetRowIJ_SeqSBAIJ, 1264 MatRestoreRowIJ_SeqSBAIJ, 1265 0, 1266 0, 1267 /*55*/ 0, 1268 0, 1269 0, 1270 0, 1271 MatSetValuesBlocked_SeqSBAIJ, 1272 /*60*/ MatGetSubMatrix_SeqSBAIJ, 1273 0, 1274 0, 1275 0, 1276 0, 1277 /*65*/ 0, 1278 0, 1279 0, 1280 0, 1281 0, 1282 /*70*/ MatGetRowMaxAbs_SeqSBAIJ, 1283 0, 1284 0, 1285 0, 1286 0, 1287 /*75*/ 0, 1288 0, 1289 0, 1290 0, 1291 0, 1292 /*80*/ 0, 1293 0, 1294 0, 1295 #if !defined(PETSC_USE_COMPLEX) 1296 MatGetInertia_SeqSBAIJ, 1297 #else 1298 0, 1299 #endif 1300 MatLoad_SeqSBAIJ, 1301 /*85*/ MatIsSymmetric_SeqSBAIJ, 1302 MatIsHermitian_SeqSBAIJ, 1303 MatIsStructurallySymmetric_SeqSBAIJ, 1304 0, 1305 0, 1306 /*90*/ 0, 1307 0, 1308 0, 1309 0, 1310 0, 1311 /*95*/ 0, 1312 0, 1313 0, 1314 0, 1315 0, 1316 /*100*/0, 1317 0, 1318 0, 1319 0, 1320 0, 1321 /*105*/0, 1322 MatRealPart_SeqSBAIJ, 1323 MatImaginaryPart_SeqSBAIJ, 1324 MatGetRowUpperTriangular_SeqSBAIJ, 1325 MatRestoreRowUpperTriangular_SeqSBAIJ, 1326 /*110*/0, 1327 0, 1328 0, 1329 0, 1330 MatMissingDiagonal_SeqSBAIJ 1331 }; 1332 1333 EXTERN_C_BEGIN 1334 #undef __FUNCT__ 1335 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1336 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqSBAIJ(Mat mat) 1337 { 1338 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1339 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 1340 PetscErrorCode ierr; 1341 1342 PetscFunctionBegin; 1343 if (aij->nonew != 1) { 1344 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 1345 } 1346 1347 /* allocate space for values if not already there */ 1348 if (!aij->saved_values) { 1349 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1350 } 1351 1352 /* copy values over */ 1353 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1354 PetscFunctionReturn(0); 1355 } 1356 EXTERN_C_END 1357 1358 EXTERN_C_BEGIN 1359 #undef __FUNCT__ 1360 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1361 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqSBAIJ(Mat mat) 1362 { 1363 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1364 PetscErrorCode ierr; 1365 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 1366 1367 PetscFunctionBegin; 1368 if (aij->nonew != 1) { 1369 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 1370 } 1371 if (!aij->saved_values) { 1372 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 1373 } 1374 1375 /* copy values over */ 1376 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1377 PetscFunctionReturn(0); 1378 } 1379 EXTERN_C_END 1380 1381 EXTERN_C_BEGIN 1382 #undef __FUNCT__ 1383 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ" 1384 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 1385 { 1386 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1387 PetscErrorCode ierr; 1388 PetscInt i,mbs,bs2; 1389 PetscTruth skipallocation = PETSC_FALSE,flg; 1390 1391 PetscFunctionBegin; 1392 B->preallocated = PETSC_TRUE; 1393 ierr = PetscOptionsGetInt(((PetscObject)B)->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1394 B->rmap.bs = B->cmap.bs = bs; 1395 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 1396 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 1397 1398 mbs = B->rmap.N/bs; 1399 bs2 = bs*bs; 1400 1401 if (mbs*bs != B->rmap.N) { 1402 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1403 } 1404 1405 if (nz == MAT_SKIP_ALLOCATION) { 1406 skipallocation = PETSC_TRUE; 1407 nz = 0; 1408 } 1409 1410 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1411 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 1412 if (nnz) { 1413 for (i=0; i<mbs; i++) { 1414 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 1415 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],mbs); 1416 } 1417 } 1418 1419 ierr = PetscOptionsHasName(((PetscObject)B)->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1420 if (!flg) { 1421 switch (bs) { 1422 case 1: 1423 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1424 B->ops->solve = MatSolve_SeqSBAIJ_1; 1425 B->ops->solves = MatSolves_SeqSBAIJ_1; 1426 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 1427 B->ops->mult = MatMult_SeqSBAIJ_1; 1428 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1429 B->ops->multtranspose = MatMult_SeqSBAIJ_1; 1430 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_1; 1431 break; 1432 case 2: 1433 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1434 B->ops->solve = MatSolve_SeqSBAIJ_2; 1435 B->ops->solvetranspose = MatSolve_SeqSBAIJ_2; 1436 B->ops->mult = MatMult_SeqSBAIJ_2; 1437 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1438 B->ops->multtranspose = MatMult_SeqSBAIJ_2; 1439 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_2; 1440 break; 1441 case 3: 1442 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1443 B->ops->solve = MatSolve_SeqSBAIJ_3; 1444 B->ops->solvetranspose = MatSolve_SeqSBAIJ_3; 1445 B->ops->mult = MatMult_SeqSBAIJ_3; 1446 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1447 B->ops->multtranspose = MatMult_SeqSBAIJ_3; 1448 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_3; 1449 break; 1450 case 4: 1451 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1452 B->ops->solve = MatSolve_SeqSBAIJ_4; 1453 B->ops->solvetranspose = MatSolve_SeqSBAIJ_4; 1454 B->ops->mult = MatMult_SeqSBAIJ_4; 1455 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1456 B->ops->multtranspose = MatMult_SeqSBAIJ_4; 1457 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_4; 1458 break; 1459 case 5: 1460 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1461 B->ops->solve = MatSolve_SeqSBAIJ_5; 1462 B->ops->solvetranspose = MatSolve_SeqSBAIJ_5; 1463 B->ops->mult = MatMult_SeqSBAIJ_5; 1464 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1465 B->ops->multtranspose = MatMult_SeqSBAIJ_5; 1466 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_5; 1467 break; 1468 case 6: 1469 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1470 B->ops->solve = MatSolve_SeqSBAIJ_6; 1471 B->ops->solvetranspose = MatSolve_SeqSBAIJ_6; 1472 B->ops->mult = MatMult_SeqSBAIJ_6; 1473 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1474 B->ops->multtranspose = MatMult_SeqSBAIJ_6; 1475 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_6; 1476 break; 1477 case 7: 1478 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1479 B->ops->solve = MatSolve_SeqSBAIJ_7; 1480 B->ops->solvetranspose = MatSolve_SeqSBAIJ_7; 1481 B->ops->mult = MatMult_SeqSBAIJ_7; 1482 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1483 B->ops->multtranspose = MatMult_SeqSBAIJ_7; 1484 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_7; 1485 break; 1486 } 1487 } 1488 1489 b->mbs = mbs; 1490 b->nbs = mbs; 1491 if (!skipallocation) { 1492 /* b->ilen will count nonzeros in each block row so far. */ 1493 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 1494 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1495 if (!nnz) { 1496 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1497 else if (nz <= 0) nz = 1; 1498 for (i=0; i<mbs; i++) { 1499 b->imax[i] = nz; 1500 } 1501 nz = nz*mbs; /* total nz */ 1502 } else { 1503 nz = 0; 1504 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1505 } 1506 /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1507 1508 /* allocate the matrix space */ 1509 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr); 1510 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1511 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1512 b->singlemalloc = PETSC_TRUE; 1513 1514 /* pointer to beginning of each row */ 1515 b->i[0] = 0; 1516 for (i=1; i<mbs+1; i++) { 1517 b->i[i] = b->i[i-1] + b->imax[i-1]; 1518 } 1519 b->free_a = PETSC_TRUE; 1520 b->free_ij = PETSC_TRUE; 1521 } else { 1522 b->free_a = PETSC_FALSE; 1523 b->free_ij = PETSC_FALSE; 1524 } 1525 1526 B->rmap.bs = bs; 1527 b->bs2 = bs2; 1528 b->nz = 0; 1529 b->maxnz = nz*bs2; 1530 1531 b->inew = 0; 1532 b->jnew = 0; 1533 b->anew = 0; 1534 b->a2anew = 0; 1535 b->permute = PETSC_FALSE; 1536 PetscFunctionReturn(0); 1537 } 1538 EXTERN_C_END 1539 1540 EXTERN_C_BEGIN 1541 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 1542 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqBAIJ(Mat, MatType,MatReuse,Mat*); 1543 EXTERN_C_END 1544 1545 /*MC 1546 MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices, 1547 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1548 1549 Options Database Keys: 1550 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions() 1551 1552 Level: beginner 1553 1554 .seealso: MatCreateSeqSBAIJ 1555 M*/ 1556 1557 EXTERN_C_BEGIN 1558 #undef __FUNCT__ 1559 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1560 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqSBAIJ(Mat B) 1561 { 1562 Mat_SeqSBAIJ *b; 1563 PetscErrorCode ierr; 1564 PetscMPIInt size; 1565 PetscTruth flg; 1566 1567 PetscFunctionBegin; 1568 ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr); 1569 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1570 1571 ierr = PetscNewLog(B,Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1572 B->data = (void*)b; 1573 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1574 B->ops->destroy = MatDestroy_SeqSBAIJ; 1575 B->ops->view = MatView_SeqSBAIJ; 1576 B->factor = 0; 1577 B->mapping = 0; 1578 b->row = 0; 1579 b->icol = 0; 1580 b->reallocs = 0; 1581 b->saved_values = 0; 1582 1583 1584 b->roworiented = PETSC_TRUE; 1585 b->nonew = 0; 1586 b->diag = 0; 1587 b->solve_work = 0; 1588 b->mult_work = 0; 1589 B->spptr = 0; 1590 b->keepzeroedrows = PETSC_FALSE; 1591 b->xtoy = 0; 1592 b->XtoY = 0; 1593 1594 b->inew = 0; 1595 b->jnew = 0; 1596 b->anew = 0; 1597 b->a2anew = 0; 1598 b->permute = PETSC_FALSE; 1599 1600 b->ignore_ltriangular = PETSC_FALSE; 1601 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_ignore_lower_triangular",&flg);CHKERRQ(ierr); 1602 if (flg) b->ignore_ltriangular = PETSC_TRUE; 1603 1604 b->getrow_utriangular = PETSC_FALSE; 1605 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_getrow_uppertriangular",&flg);CHKERRQ(ierr); 1606 if (flg) b->getrow_utriangular = PETSC_TRUE; 1607 1608 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1609 "MatStoreValues_SeqSBAIJ", 1610 MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1611 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1612 "MatRetrieveValues_SeqSBAIJ", 1613 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1614 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1615 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1616 MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1617 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C", 1618 "MatConvert_SeqSBAIJ_SeqAIJ", 1619 MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr); 1620 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C", 1621 "MatConvert_SeqSBAIJ_SeqBAIJ", 1622 MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr); 1623 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C", 1624 "MatSeqSBAIJSetPreallocation_SeqSBAIJ", 1625 MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr); 1626 1627 B->symmetric = PETSC_TRUE; 1628 B->structurally_symmetric = PETSC_TRUE; 1629 B->symmetric_set = PETSC_TRUE; 1630 B->structurally_symmetric_set = PETSC_TRUE; 1631 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQSBAIJ);CHKERRQ(ierr); 1632 PetscFunctionReturn(0); 1633 } 1634 EXTERN_C_END 1635 1636 #undef __FUNCT__ 1637 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1638 /*@C 1639 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1640 compressed row) format. For good matrix assembly performance the 1641 user should preallocate the matrix storage by setting the parameter nz 1642 (or the array nnz). By setting these parameters accurately, performance 1643 during matrix assembly can be increased by more than a factor of 50. 1644 1645 Collective on Mat 1646 1647 Input Parameters: 1648 + A - the symmetric matrix 1649 . bs - size of block 1650 . nz - number of block nonzeros per block row (same for all rows) 1651 - nnz - array containing the number of block nonzeros in the upper triangular plus 1652 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1653 1654 Options Database Keys: 1655 . -mat_no_unroll - uses code that does not unroll the loops in the 1656 block calculations (much slower) 1657 . -mat_block_size - size of the blocks to use 1658 1659 Level: intermediate 1660 1661 Notes: 1662 Specify the preallocated storage with either nz or nnz (not both). 1663 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1664 allocation. For additional details, see the users manual chapter on 1665 matrices. 1666 1667 If the nnz parameter is given then the nz parameter is ignored 1668 1669 1670 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1671 @*/ 1672 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 1673 { 1674 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 1675 1676 PetscFunctionBegin; 1677 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1678 if (f) { 1679 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 1680 } 1681 PetscFunctionReturn(0); 1682 } 1683 1684 #undef __FUNCT__ 1685 #define __FUNCT__ "MatCreateSeqSBAIJ" 1686 /*@C 1687 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1688 compressed row) format. For good matrix assembly performance the 1689 user should preallocate the matrix storage by setting the parameter nz 1690 (or the array nnz). By setting these parameters accurately, performance 1691 during matrix assembly can be increased by more than a factor of 50. 1692 1693 Collective on MPI_Comm 1694 1695 Input Parameters: 1696 + comm - MPI communicator, set to PETSC_COMM_SELF 1697 . bs - size of block 1698 . m - number of rows, or number of columns 1699 . nz - number of block nonzeros per block row (same for all rows) 1700 - nnz - array containing the number of block nonzeros in the upper triangular plus 1701 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1702 1703 Output Parameter: 1704 . A - the symmetric matrix 1705 1706 Options Database Keys: 1707 . -mat_no_unroll - uses code that does not unroll the loops in the 1708 block calculations (much slower) 1709 . -mat_block_size - size of the blocks to use 1710 1711 Level: intermediate 1712 1713 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 1714 MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely 1715 true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles. 1716 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 1717 1718 Notes: 1719 The number of rows and columns must be divisible by blocksize. 1720 This matrix type does not support complex Hermitian operation. 1721 1722 Specify the preallocated storage with either nz or nnz (not both). 1723 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1724 allocation. For additional details, see the users manual chapter on 1725 matrices. 1726 1727 If the nnz parameter is given then the nz parameter is ignored 1728 1729 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1730 @*/ 1731 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 1732 { 1733 PetscErrorCode ierr; 1734 1735 PetscFunctionBegin; 1736 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1737 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 1738 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1739 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 1740 PetscFunctionReturn(0); 1741 } 1742 1743 #undef __FUNCT__ 1744 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1745 PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1746 { 1747 Mat C; 1748 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1749 PetscErrorCode ierr; 1750 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 1751 1752 PetscFunctionBegin; 1753 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1754 1755 *B = 0; 1756 ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr); 1757 ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr); 1758 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1759 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1760 c = (Mat_SeqSBAIJ*)C->data; 1761 1762 C->preallocated = PETSC_TRUE; 1763 C->factor = A->factor; 1764 c->row = 0; 1765 c->icol = 0; 1766 c->saved_values = 0; 1767 c->keepzeroedrows = a->keepzeroedrows; 1768 C->assembled = PETSC_TRUE; 1769 1770 ierr = PetscMapCopy(((PetscObject)A)->comm,&A->rmap,&C->rmap);CHKERRQ(ierr); 1771 ierr = PetscMapCopy(((PetscObject)A)->comm,&A->cmap,&C->cmap);CHKERRQ(ierr); 1772 c->bs2 = a->bs2; 1773 c->mbs = a->mbs; 1774 c->nbs = a->nbs; 1775 1776 ierr = PetscMalloc2((mbs+1),PetscInt,&c->imax,(mbs+1),PetscInt,&c->ilen);CHKERRQ(ierr); 1777 for (i=0; i<mbs; i++) { 1778 c->imax[i] = a->imax[i]; 1779 c->ilen[i] = a->ilen[i]; 1780 } 1781 1782 /* allocate the matrix space */ 1783 ierr = PetscMalloc3(bs2*nz,MatScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 1784 c->singlemalloc = PETSC_TRUE; 1785 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1786 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt)));CHKERRQ(ierr); 1787 if (mbs > 0) { 1788 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1789 if (cpvalues == MAT_COPY_VALUES) { 1790 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1791 } else { 1792 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1793 } 1794 } 1795 1796 c->roworiented = a->roworiented; 1797 c->nonew = a->nonew; 1798 1799 if (a->diag) { 1800 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 1801 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1802 for (i=0; i<mbs; i++) { 1803 c->diag[i] = a->diag[i]; 1804 } 1805 } else c->diag = 0; 1806 c->nz = a->nz; 1807 c->maxnz = a->maxnz; 1808 c->solve_work = 0; 1809 c->mult_work = 0; 1810 c->free_a = PETSC_TRUE; 1811 c->free_ij = PETSC_TRUE; 1812 *B = C; 1813 ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 1814 PetscFunctionReturn(0); 1815 } 1816 1817 #undef __FUNCT__ 1818 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1819 PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer, MatType type,Mat *A) 1820 { 1821 Mat_SeqSBAIJ *a; 1822 Mat B; 1823 PetscErrorCode ierr; 1824 int fd; 1825 PetscMPIInt size; 1826 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 1827 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 1828 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 1829 PetscInt *masked,nmask,tmp,bs2,ishift; 1830 PetscScalar *aa; 1831 MPI_Comm comm = ((PetscObject)viewer)->comm; 1832 1833 PetscFunctionBegin; 1834 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1835 bs2 = bs*bs; 1836 1837 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1838 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1839 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1840 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1841 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1842 M = header[1]; N = header[2]; nz = header[3]; 1843 1844 if (header[3] < 0) { 1845 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1846 } 1847 1848 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1849 1850 /* 1851 This code adds extra rows to make sure the number of rows is 1852 divisible by the blocksize 1853 */ 1854 mbs = M/bs; 1855 extra_rows = bs - M + bs*(mbs); 1856 if (extra_rows == bs) extra_rows = 0; 1857 else mbs++; 1858 if (extra_rows) { 1859 ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 1860 } 1861 1862 /* read in row lengths */ 1863 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1864 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1865 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1866 1867 /* read in column indices */ 1868 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1869 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1870 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1871 1872 /* loop over row lengths determining block row lengths */ 1873 ierr = PetscMalloc(mbs*sizeof(PetscInt),&s_browlengths);CHKERRQ(ierr); 1874 ierr = PetscMemzero(s_browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1875 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 1876 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1877 masked = mask + mbs; 1878 rowcount = 0; nzcount = 0; 1879 for (i=0; i<mbs; i++) { 1880 nmask = 0; 1881 for (j=0; j<bs; j++) { 1882 kmax = rowlengths[rowcount]; 1883 for (k=0; k<kmax; k++) { 1884 tmp = jj[nzcount++]/bs; /* block col. index */ 1885 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1886 } 1887 rowcount++; 1888 } 1889 s_browlengths[i] += nmask; 1890 1891 /* zero out the mask elements we set */ 1892 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1893 } 1894 1895 /* create our matrix */ 1896 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 1897 ierr = MatSetSizes(B,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 1898 ierr = MatSetType(B,type);CHKERRQ(ierr); 1899 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(B,bs,0,s_browlengths);CHKERRQ(ierr); 1900 a = (Mat_SeqSBAIJ*)B->data; 1901 1902 /* set matrix "i" values */ 1903 a->i[0] = 0; 1904 for (i=1; i<= mbs; i++) { 1905 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1906 a->ilen[i-1] = s_browlengths[i-1]; 1907 } 1908 a->nz = a->i[mbs]; 1909 1910 /* read in nonzero values */ 1911 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1912 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1913 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1914 1915 /* set "a" and "j" values into matrix */ 1916 nzcount = 0; jcount = 0; 1917 for (i=0; i<mbs; i++) { 1918 nzcountb = nzcount; 1919 nmask = 0; 1920 for (j=0; j<bs; j++) { 1921 kmax = rowlengths[i*bs+j]; 1922 for (k=0; k<kmax; k++) { 1923 tmp = jj[nzcount++]/bs; /* block col. index */ 1924 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1925 } 1926 } 1927 /* sort the masked values */ 1928 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1929 1930 /* set "j" values into matrix */ 1931 maskcount = 1; 1932 for (j=0; j<nmask; j++) { 1933 a->j[jcount++] = masked[j]; 1934 mask[masked[j]] = maskcount++; 1935 } 1936 1937 /* set "a" values into matrix */ 1938 ishift = bs2*a->i[i]; 1939 for (j=0; j<bs; j++) { 1940 kmax = rowlengths[i*bs+j]; 1941 for (k=0; k<kmax; k++) { 1942 tmp = jj[nzcountb]/bs ; /* block col. index */ 1943 if (tmp >= i){ 1944 block = mask[tmp] - 1; 1945 point = jj[nzcountb] - bs*tmp; 1946 idx = ishift + bs2*block + j + bs*point; 1947 a->a[idx] = aa[nzcountb]; 1948 } 1949 nzcountb++; 1950 } 1951 } 1952 /* zero out the mask elements we set */ 1953 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1954 } 1955 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1956 1957 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1958 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1959 ierr = PetscFree(aa);CHKERRQ(ierr); 1960 ierr = PetscFree(jj);CHKERRQ(ierr); 1961 ierr = PetscFree(mask);CHKERRQ(ierr); 1962 1963 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1964 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1965 ierr = MatView_Private(B);CHKERRQ(ierr); 1966 *A = B; 1967 PetscFunctionReturn(0); 1968 } 1969 1970 #undef __FUNCT__ 1971 #define __FUNCT__ "MatRelax_SeqSBAIJ" 1972 PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1973 { 1974 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1975 MatScalar *aa=a->a,*v,*v1; 1976 PetscScalar *x,*b,*t,sum,d; 1977 PetscErrorCode ierr; 1978 PetscInt m=a->mbs,bs=A->rmap.bs,*ai=a->i,*aj=a->j; 1979 PetscInt nz,nz1,*vj,*vj1,i; 1980 1981 PetscFunctionBegin; 1982 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 1983 1984 its = its*lits; 1985 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1986 1987 if (bs > 1) 1988 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 1989 1990 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1991 if (xx != bb) { 1992 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1993 } else { 1994 b = x; 1995 } 1996 1997 if (!a->relax_work) { 1998 ierr = PetscMalloc(m*sizeof(PetscScalar),&a->relax_work);CHKERRQ(ierr); 1999 } 2000 t = a->relax_work; 2001 2002 if (flag & SOR_ZERO_INITIAL_GUESS) { 2003 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2004 ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr); 2005 2006 for (i=0; i<m; i++){ 2007 d = *(aa + ai[i]); /* diag[i] */ 2008 v = aa + ai[i] + 1; 2009 vj = aj + ai[i] + 1; 2010 nz = ai[i+1] - ai[i] - 1; 2011 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2012 x[i] = omega*t[i]/d; 2013 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 2014 } 2015 } 2016 2017 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2018 if (!(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP)){ 2019 t = b; 2020 } 2021 2022 for (i=m-1; i>=0; i--){ 2023 d = *(aa + ai[i]); 2024 v = aa + ai[i] + 1; 2025 vj = aj + ai[i] + 1; 2026 nz = ai[i+1] - ai[i] - 1; 2027 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2028 sum = t[i]; 2029 while (nz--) sum -= x[*vj++]*(*v++); 2030 x[i] = (1-omega)*x[i] + omega*sum/d; 2031 } 2032 t = a->relax_work; 2033 } 2034 its--; 2035 } 2036 2037 while (its--) { 2038 /* 2039 forward sweep: 2040 for i=0,...,m-1: 2041 sum[i] = (b[i] - U(i,:)x )/d[i]; 2042 x[i] = (1-omega)x[i] + omega*sum[i]; 2043 b = b - x[i]*U^T(i,:); 2044 2045 */ 2046 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2047 ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr); 2048 2049 for (i=0; i<m; i++){ 2050 d = *(aa + ai[i]); /* diag[i] */ 2051 v = aa + ai[i] + 1; v1=v; 2052 vj = aj + ai[i] + 1; vj1=vj; 2053 nz = ai[i+1] - ai[i] - 1; nz1=nz; 2054 sum = t[i]; 2055 ierr = PetscLogFlops(4*nz-2);CHKERRQ(ierr); 2056 while (nz1--) sum -= (*v1++)*x[*vj1++]; 2057 x[i] = (1-omega)*x[i] + omega*sum/d; 2058 while (nz--) t[*vj++] -= x[i]*(*v++); 2059 } 2060 } 2061 2062 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2063 /* 2064 backward sweep: 2065 b = b - x[i]*U^T(i,:), i=0,...,n-2 2066 for i=m-1,...,0: 2067 sum[i] = (b[i] - U(i,:)x )/d[i]; 2068 x[i] = (1-omega)x[i] + omega*sum[i]; 2069 */ 2070 /* if there was a forward sweep done above then I thing the next two for loops are not needed */ 2071 ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr); 2072 2073 for (i=0; i<m-1; i++){ /* update rhs */ 2074 v = aa + ai[i] + 1; 2075 vj = aj + ai[i] + 1; 2076 nz = ai[i+1] - ai[i] - 1; 2077 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2078 while (nz--) t[*vj++] -= x[i]*(*v++); 2079 } 2080 for (i=m-1; i>=0; i--){ 2081 d = *(aa + ai[i]); 2082 v = aa + ai[i] + 1; 2083 vj = aj + ai[i] + 1; 2084 nz = ai[i+1] - ai[i] - 1; 2085 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2086 sum = t[i]; 2087 while (nz--) sum -= x[*vj++]*(*v++); 2088 x[i] = (1-omega)*x[i] + omega*sum/d; 2089 } 2090 } 2091 } 2092 2093 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2094 if (bb != xx) { 2095 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2096 } 2097 PetscFunctionReturn(0); 2098 } 2099 2100 #undef __FUNCT__ 2101 #define __FUNCT__ "MatCreateSeqSBAIJWithArrays" 2102 /*@ 2103 MatCreateSeqSBAIJWithArrays - Creates an sequential SBAIJ matrix using matrix elements 2104 (upper triangular entries in CSR format) provided by the user. 2105 2106 Collective on MPI_Comm 2107 2108 Input Parameters: 2109 + comm - must be an MPI communicator of size 1 2110 . bs - size of block 2111 . m - number of rows 2112 . n - number of columns 2113 . i - row indices 2114 . j - column indices 2115 - a - matrix values 2116 2117 Output Parameter: 2118 . mat - the matrix 2119 2120 Level: intermediate 2121 2122 Notes: 2123 The i, j, and a arrays are not copied by this routine, the user must free these arrays 2124 once the matrix is destroyed 2125 2126 You cannot set new nonzero locations into this matrix, that will generate an error. 2127 2128 The i and j indices are 0 based 2129 2130 .seealso: MatCreate(), MatCreateMPISBAIJ(), MatCreateSeqSBAIJ() 2131 2132 @*/ 2133 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqSBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 2134 { 2135 PetscErrorCode ierr; 2136 PetscInt ii; 2137 Mat_SeqSBAIJ *sbaij; 2138 2139 PetscFunctionBegin; 2140 if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 2141 if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2142 2143 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2144 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 2145 ierr = MatSetType(*mat,MATSEQSBAIJ);CHKERRQ(ierr); 2146 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 2147 sbaij = (Mat_SeqSBAIJ*)(*mat)->data; 2148 ierr = PetscMalloc2(m,PetscInt,&sbaij->imax,m,PetscInt,&sbaij->ilen);CHKERRQ(ierr); 2149 2150 sbaij->i = i; 2151 sbaij->j = j; 2152 sbaij->a = a; 2153 sbaij->singlemalloc = PETSC_FALSE; 2154 sbaij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 2155 sbaij->free_a = PETSC_FALSE; 2156 sbaij->free_ij = PETSC_FALSE; 2157 2158 for (ii=0; ii<m; ii++) { 2159 sbaij->ilen[ii] = sbaij->imax[ii] = i[ii+1] - i[ii]; 2160 #if defined(PETSC_USE_DEBUG) 2161 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 2162 #endif 2163 } 2164 #if defined(PETSC_USE_DEBUG) 2165 for (ii=0; ii<sbaij->i[m]; ii++) { 2166 if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 2167 if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 2168 } 2169 #endif 2170 2171 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2172 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2173 PetscFunctionReturn(0); 2174 } 2175 2176 2177 2178 2179 2180