1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the SBAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 8 #include "src/inline/spops.h" 9 #include "src/mat/impls/sbaij/seq/sbaij.h" 10 11 #define CHUNKSIZE 10 12 13 /* 14 Checks for missing diagonals 15 */ 16 #undef __FUNCT__ 17 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 18 PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A) 19 { 20 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 21 PetscErrorCode ierr; 22 PetscInt *diag,*jj = a->j,i; 23 24 PetscFunctionBegin; 25 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 26 diag = a->diag; 27 for (i=0; i<a->mbs; i++) { 28 if (jj[diag[i]] != i) SETERRQ1(PETSC_ERR_ARG_CORRUPT,"Matrix is missing diagonal number %D",i); 29 } 30 PetscFunctionReturn(0); 31 } 32 33 #undef __FUNCT__ 34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 35 PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A) 36 { 37 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 38 PetscErrorCode ierr; 39 PetscInt i; 40 41 PetscFunctionBegin; 42 if (!a->diag) { 43 ierr = PetscMalloc(a->mbs*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 44 } 45 for (i=0; i<a->mbs; i++) a->diag[i] = a->i[i]; 46 PetscFunctionReturn(0); 47 } 48 49 #undef __FUNCT__ 50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 51 static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 52 { 53 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 54 PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap.bs; 55 PetscErrorCode ierr; 56 57 PetscFunctionBegin; 58 *nn = n; 59 if (!ia) PetscFunctionReturn(0); 60 if (!blockcompressed) { 61 /* malloc & create the natural set of indices */ 62 ierr = PetscMalloc2((n+1)*bs,PetscInt,ia,nz*bs,PetscInt,ja);CHKERRQ(ierr); 63 for (i=0; i<n+1; i++) { 64 for (j=0; j<bs; j++) { 65 *ia[i*bs+j] = a->i[i]*bs+j+oshift; 66 } 67 } 68 for (i=0; i<nz; i++) { 69 for (j=0; j<bs; j++) { 70 *ja[i*bs+j] = a->j[i]*bs+j+oshift; 71 } 72 } 73 } else { /* blockcompressed */ 74 if (oshift == 1) { 75 /* temporarily add 1 to i and j indices */ 76 for (i=0; i<nz; i++) a->j[i]++; 77 for (i=0; i<n+1; i++) a->i[i]++; 78 } 79 *ia = a->i; *ja = a->j; 80 } 81 82 PetscFunctionReturn(0); 83 } 84 85 #undef __FUNCT__ 86 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 87 static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 88 { 89 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 90 PetscInt i,n = a->mbs,nz = a->i[n]; 91 PetscErrorCode ierr; 92 93 PetscFunctionBegin; 94 if (!ia) PetscFunctionReturn(0); 95 96 if (!blockcompressed) { 97 ierr = PetscFree2(*ia,*ja);CHKERRQ(ierr); 98 } else if (oshift == 1) { /* blockcompressed */ 99 for (i=0; i<nz; i++) a->j[i]--; 100 for (i=0; i<n+1; i++) a->i[i]--; 101 } 102 103 PetscFunctionReturn(0); 104 } 105 106 #undef __FUNCT__ 107 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 108 PetscErrorCode MatDestroy_SeqSBAIJ(Mat A) 109 { 110 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 111 PetscErrorCode ierr; 112 113 PetscFunctionBegin; 114 #if defined(PETSC_USE_LOG) 115 PetscLogObjectState((PetscObject)A,"Rows=%D, NZ=%D",A->rmap.N,a->nz); 116 #endif 117 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 118 if (a->row) {ierr = ISDestroy(a->row);CHKERRQ(ierr);} 119 if (a->col){ierr = ISDestroy(a->col);CHKERRQ(ierr);} 120 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 121 ierr = PetscFree(a->diag);CHKERRQ(ierr); 122 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 123 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 124 ierr = PetscFree(a->relax_work);CHKERRQ(ierr); 125 ierr = PetscFree(a->solves_work);CHKERRQ(ierr); 126 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 127 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 128 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 129 130 ierr = PetscFree(a->inew);CHKERRQ(ierr); 131 ierr = PetscFree(a);CHKERRQ(ierr); 132 133 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 134 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 135 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 136 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 137 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 138 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqbaij_C","",PETSC_NULL);CHKERRQ(ierr); 139 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 140 PetscFunctionReturn(0); 141 } 142 143 #undef __FUNCT__ 144 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 145 PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op) 146 { 147 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 148 PetscErrorCode ierr; 149 150 PetscFunctionBegin; 151 switch (op) { 152 case MAT_ROW_ORIENTED: 153 a->roworiented = PETSC_TRUE; 154 break; 155 case MAT_COLUMN_ORIENTED: 156 a->roworiented = PETSC_FALSE; 157 break; 158 case MAT_COLUMNS_SORTED: 159 a->sorted = PETSC_TRUE; 160 break; 161 case MAT_COLUMNS_UNSORTED: 162 a->sorted = PETSC_FALSE; 163 break; 164 case MAT_KEEP_ZEROED_ROWS: 165 a->keepzeroedrows = PETSC_TRUE; 166 break; 167 case MAT_NO_NEW_NONZERO_LOCATIONS: 168 a->nonew = 1; 169 break; 170 case MAT_NEW_NONZERO_LOCATION_ERR: 171 a->nonew = -1; 172 break; 173 case MAT_NEW_NONZERO_ALLOCATION_ERR: 174 a->nonew = -2; 175 break; 176 case MAT_YES_NEW_NONZERO_LOCATIONS: 177 a->nonew = 0; 178 break; 179 case MAT_ROWS_SORTED: 180 case MAT_ROWS_UNSORTED: 181 case MAT_YES_NEW_DIAGONALS: 182 case MAT_IGNORE_OFF_PROC_ENTRIES: 183 case MAT_USE_HASH_TABLE: 184 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 185 break; 186 case MAT_NO_NEW_DIAGONALS: 187 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 188 case MAT_NOT_SYMMETRIC: 189 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 190 case MAT_HERMITIAN: 191 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 192 case MAT_SYMMETRIC: 193 case MAT_STRUCTURALLY_SYMMETRIC: 194 case MAT_NOT_HERMITIAN: 195 case MAT_SYMMETRY_ETERNAL: 196 case MAT_NOT_SYMMETRY_ETERNAL: 197 ierr = PetscInfo1(A,"Option %s not relevent\n",MatOptions[op]);CHKERRQ(ierr); 198 break; 199 case MAT_IGNORE_LOWER_TRIANGULAR: 200 a->ignore_ltriangular = PETSC_TRUE; 201 break; 202 case MAT_ERROR_LOWER_TRIANGULAR: 203 a->ignore_ltriangular = PETSC_FALSE; 204 break; 205 case MAT_GETROW_UPPERTRIANGULAR: 206 a->getrow_utriangular = PETSC_TRUE; 207 break; 208 default: 209 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 210 } 211 PetscFunctionReturn(0); 212 } 213 214 #undef __FUNCT__ 215 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 216 PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **v) 217 { 218 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 219 PetscErrorCode ierr; 220 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2; 221 MatScalar *aa,*aa_i; 222 PetscScalar *v_i; 223 224 PetscFunctionBegin; 225 if (A && !a->getrow_utriangular) SETERRQ(PETSC_ERR_SUP,"MatGetRow is not supported for SBAIJ matrix format. Getting the upper triangular part of row, run with -mat_getrow_uppertriangular, call MatSetOption(mat,MAT_GETROW_UPPERTRIANGULAR) or MatGetRowUpperTriangular()"); 226 /* Get the upper triangular part of the row */ 227 bs = A->rmap.bs; 228 ai = a->i; 229 aj = a->j; 230 aa = a->a; 231 bs2 = a->bs2; 232 233 if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %D out of range", row); 234 235 bn = row/bs; /* Block number */ 236 bp = row % bs; /* Block position */ 237 M = ai[bn+1] - ai[bn]; 238 *ncols = bs*M; 239 240 if (v) { 241 *v = 0; 242 if (*ncols) { 243 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 244 for (i=0; i<M; i++) { /* for each block in the block row */ 245 v_i = *v + i*bs; 246 aa_i = aa + bs2*(ai[bn] + i); 247 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 248 } 249 } 250 } 251 252 if (cols) { 253 *cols = 0; 254 if (*ncols) { 255 ierr = PetscMalloc((*ncols+row)*sizeof(PetscInt),cols);CHKERRQ(ierr); 256 for (i=0; i<M; i++) { /* for each block in the block row */ 257 cols_i = *cols + i*bs; 258 itmp = bs*aj[ai[bn] + i]; 259 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 260 } 261 } 262 } 263 264 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 265 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 266 #ifdef column_search 267 v_i = *v + M*bs; 268 cols_i = *cols + M*bs; 269 for (i=0; i<bn; i++){ /* for each block row */ 270 M = ai[i+1] - ai[i]; 271 for (j=0; j<M; j++){ 272 itmp = aj[ai[i] + j]; /* block column value */ 273 if (itmp == bn){ 274 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 275 for (k=0; k<bs; k++) { 276 *cols_i++ = i*bs+k; 277 *v_i++ = aa_i[k]; 278 } 279 *ncols += bs; 280 break; 281 } 282 } 283 } 284 #endif 285 PetscFunctionReturn(0); 286 } 287 288 #undef __FUNCT__ 289 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 290 PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 291 { 292 PetscErrorCode ierr; 293 294 PetscFunctionBegin; 295 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 296 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 297 PetscFunctionReturn(0); 298 } 299 300 #undef __FUNCT__ 301 #define __FUNCT__ "MatGetRowUpperTriangular_SeqSBAIJ" 302 PetscErrorCode MatGetRowUpperTriangular_SeqSBAIJ(Mat A) 303 { 304 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 305 306 PetscFunctionBegin; 307 a->getrow_utriangular = PETSC_TRUE; 308 PetscFunctionReturn(0); 309 } 310 #undef __FUNCT__ 311 #define __FUNCT__ "MatRestoreRowUpperTriangular_SeqSBAIJ" 312 PetscErrorCode MatRestoreRowUpperTriangular_SeqSBAIJ(Mat A) 313 { 314 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 315 316 PetscFunctionBegin; 317 a->getrow_utriangular = PETSC_FALSE; 318 PetscFunctionReturn(0); 319 } 320 321 #undef __FUNCT__ 322 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 323 PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,Mat *B) 324 { 325 PetscErrorCode ierr; 326 PetscFunctionBegin; 327 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 328 PetscFunctionReturn(0); 329 } 330 331 #undef __FUNCT__ 332 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 333 static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 334 { 335 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 336 PetscErrorCode ierr; 337 PetscInt i,j,bs = A->rmap.bs,k,l,bs2=a->bs2; 338 const char *name; 339 PetscViewerFormat format; 340 341 PetscFunctionBegin; 342 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 343 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 344 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 345 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 346 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 347 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 348 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 349 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 350 for (i=0; i<a->mbs; i++) { 351 for (j=0; j<bs; j++) { 352 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 353 for (k=a->i[i]; k<a->i[i+1]; k++) { 354 for (l=0; l<bs; l++) { 355 #if defined(PETSC_USE_COMPLEX) 356 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 357 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 358 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 359 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 360 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 361 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 362 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 363 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 364 } 365 #else 366 if (a->a[bs2*k + l*bs + j] != 0.0) { 367 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 368 } 369 #endif 370 } 371 } 372 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 373 } 374 } 375 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 376 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 377 PetscFunctionReturn(0); 378 } else { 379 if (A->factor && bs>1){ 380 ierr = PetscPrintf(PETSC_COMM_SELF,"Warning: matrix is factored. MatView_SeqSBAIJ_ASCII() may not display complete or logically correct entries!\n");CHKERRQ(ierr); 381 } 382 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 383 for (i=0; i<a->mbs; i++) { 384 for (j=0; j<bs; j++) { 385 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 386 for (k=a->i[i]; k<a->i[i+1]; k++) { 387 for (l=0; l<bs; l++) { 388 #if defined(PETSC_USE_COMPLEX) 389 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 390 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 391 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 392 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 393 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 394 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 395 } else { 396 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 397 } 398 #else 399 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 400 #endif 401 } 402 } 403 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 404 } 405 } 406 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 407 } 408 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 409 PetscFunctionReturn(0); 410 } 411 412 #undef __FUNCT__ 413 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 414 static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 415 { 416 Mat A = (Mat) Aa; 417 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 418 PetscErrorCode ierr; 419 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2; 420 PetscMPIInt rank; 421 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 422 MatScalar *aa; 423 MPI_Comm comm; 424 PetscViewer viewer; 425 426 PetscFunctionBegin; 427 /* 428 This is nasty. If this is called from an originally parallel matrix 429 then all processes call this,but only the first has the matrix so the 430 rest should return immediately. 431 */ 432 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 433 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 434 if (rank) PetscFunctionReturn(0); 435 436 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 437 438 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 439 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 440 441 /* loop over matrix elements drawing boxes */ 442 color = PETSC_DRAW_BLUE; 443 for (i=0,row=0; i<mbs; i++,row+=bs) { 444 for (j=a->i[i]; j<a->i[i+1]; j++) { 445 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 446 x_l = a->j[j]*bs; x_r = x_l + 1.0; 447 aa = a->a + j*bs2; 448 for (k=0; k<bs; k++) { 449 for (l=0; l<bs; l++) { 450 if (PetscRealPart(*aa++) >= 0.) continue; 451 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 452 } 453 } 454 } 455 } 456 color = PETSC_DRAW_CYAN; 457 for (i=0,row=0; i<mbs; i++,row+=bs) { 458 for (j=a->i[i]; j<a->i[i+1]; j++) { 459 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 460 x_l = a->j[j]*bs; x_r = x_l + 1.0; 461 aa = a->a + j*bs2; 462 for (k=0; k<bs; k++) { 463 for (l=0; l<bs; l++) { 464 if (PetscRealPart(*aa++) != 0.) continue; 465 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 466 } 467 } 468 } 469 } 470 471 color = PETSC_DRAW_RED; 472 for (i=0,row=0; i<mbs; i++,row+=bs) { 473 for (j=a->i[i]; j<a->i[i+1]; j++) { 474 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 475 x_l = a->j[j]*bs; x_r = x_l + 1.0; 476 aa = a->a + j*bs2; 477 for (k=0; k<bs; k++) { 478 for (l=0; l<bs; l++) { 479 if (PetscRealPart(*aa++) <= 0.) continue; 480 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 481 } 482 } 483 } 484 } 485 PetscFunctionReturn(0); 486 } 487 488 #undef __FUNCT__ 489 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 490 static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 491 { 492 PetscErrorCode ierr; 493 PetscReal xl,yl,xr,yr,w,h; 494 PetscDraw draw; 495 PetscTruth isnull; 496 497 PetscFunctionBegin; 498 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 499 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 500 501 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 502 xr = A->rmap.N; yr = A->rmap.N; h = yr/10.0; w = xr/10.0; 503 xr += w; yr += h; xl = -w; yl = -h; 504 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 505 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 506 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 507 PetscFunctionReturn(0); 508 } 509 510 #undef __FUNCT__ 511 #define __FUNCT__ "MatView_SeqSBAIJ" 512 PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 513 { 514 PetscErrorCode ierr; 515 PetscTruth iascii,isdraw; 516 517 PetscFunctionBegin; 518 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 519 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 520 if (iascii){ 521 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 522 } else if (isdraw) { 523 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 524 } else { 525 Mat B; 526 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 527 ierr = MatView(B,viewer);CHKERRQ(ierr); 528 ierr = MatDestroy(B);CHKERRQ(ierr); 529 } 530 PetscFunctionReturn(0); 531 } 532 533 534 #undef __FUNCT__ 535 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 536 PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 537 { 538 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 539 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 540 PetscInt *ai = a->i,*ailen = a->ilen; 541 PetscInt brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2; 542 MatScalar *ap,*aa = a->a,zero = 0.0; 543 544 PetscFunctionBegin; 545 for (k=0; k<m; k++) { /* loop over rows */ 546 row = im[k]; brow = row/bs; 547 if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",row); 548 if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1); 549 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 550 nrow = ailen[brow]; 551 for (l=0; l<n; l++) { /* loop over columns */ 552 if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",in[l]); 553 if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1); 554 col = in[l] ; 555 bcol = col/bs; 556 cidx = col%bs; 557 ridx = row%bs; 558 high = nrow; 559 low = 0; /* assume unsorted */ 560 while (high-low > 5) { 561 t = (low+high)/2; 562 if (rp[t] > bcol) high = t; 563 else low = t; 564 } 565 for (i=low; i<high; i++) { 566 if (rp[i] > bcol) break; 567 if (rp[i] == bcol) { 568 *v++ = ap[bs2*i+bs*cidx+ridx]; 569 goto finished; 570 } 571 } 572 *v++ = zero; 573 finished:; 574 } 575 } 576 PetscFunctionReturn(0); 577 } 578 579 580 #undef __FUNCT__ 581 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 582 PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 583 { 584 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 585 PetscErrorCode ierr; 586 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 587 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 588 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval; 589 PetscTruth roworiented=a->roworiented; 590 const MatScalar *value = v; 591 MatScalar *ap,*aa = a->a,*bap; 592 593 PetscFunctionBegin; 594 if (roworiented) { 595 stepval = (n-1)*bs; 596 } else { 597 stepval = (m-1)*bs; 598 } 599 for (k=0; k<m; k++) { /* loop over added rows */ 600 row = im[k]; 601 if (row < 0) continue; 602 #if defined(PETSC_USE_DEBUG) 603 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 604 #endif 605 rp = aj + ai[row]; 606 ap = aa + bs2*ai[row]; 607 rmax = imax[row]; 608 nrow = ailen[row]; 609 low = 0; 610 high = nrow; 611 for (l=0; l<n; l++) { /* loop over added columns */ 612 if (in[l] < 0) continue; 613 col = in[l]; 614 #if defined(PETSC_USE_DEBUG) 615 if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",col,a->nbs-1); 616 #endif 617 if (col < row) continue; /* ignore lower triangular block */ 618 if (roworiented) { 619 value = v + k*(stepval+bs)*bs + l*bs; 620 } else { 621 value = v + l*(stepval+bs)*bs + k*bs; 622 } 623 if (col <= lastcol) low = 0; else high = nrow; 624 lastcol = col; 625 while (high-low > 7) { 626 t = (low+high)/2; 627 if (rp[t] > col) high = t; 628 else low = t; 629 } 630 for (i=low; i<high; i++) { 631 if (rp[i] > col) break; 632 if (rp[i] == col) { 633 bap = ap + bs2*i; 634 if (roworiented) { 635 if (is == ADD_VALUES) { 636 for (ii=0; ii<bs; ii++,value+=stepval) { 637 for (jj=ii; jj<bs2; jj+=bs) { 638 bap[jj] += *value++; 639 } 640 } 641 } else { 642 for (ii=0; ii<bs; ii++,value+=stepval) { 643 for (jj=ii; jj<bs2; jj+=bs) { 644 bap[jj] = *value++; 645 } 646 } 647 } 648 } else { 649 if (is == ADD_VALUES) { 650 for (ii=0; ii<bs; ii++,value+=stepval) { 651 for (jj=0; jj<bs; jj++) { 652 *bap++ += *value++; 653 } 654 } 655 } else { 656 for (ii=0; ii<bs; ii++,value+=stepval) { 657 for (jj=0; jj<bs; jj++) { 658 *bap++ = *value++; 659 } 660 } 661 } 662 } 663 goto noinsert2; 664 } 665 } 666 if (nonew == 1) goto noinsert2; 667 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 668 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 669 N = nrow++ - 1; high++; 670 /* shift up all the later entries in this row */ 671 for (ii=N; ii>=i; ii--) { 672 rp[ii+1] = rp[ii]; 673 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 674 } 675 if (N >= i) { 676 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 677 } 678 rp[i] = col; 679 bap = ap + bs2*i; 680 if (roworiented) { 681 for (ii=0; ii<bs; ii++,value+=stepval) { 682 for (jj=ii; jj<bs2; jj+=bs) { 683 bap[jj] = *value++; 684 } 685 } 686 } else { 687 for (ii=0; ii<bs; ii++,value+=stepval) { 688 for (jj=0; jj<bs; jj++) { 689 *bap++ = *value++; 690 } 691 } 692 } 693 noinsert2:; 694 low = i; 695 } 696 ailen[row] = nrow; 697 } 698 PetscFunctionReturn(0); 699 } 700 701 #undef __FUNCT__ 702 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 703 PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 704 { 705 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 706 PetscErrorCode ierr; 707 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 708 PetscInt m = A->rmap.N,*ip,N,*ailen = a->ilen; 709 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 710 MatScalar *aa = a->a,*ap; 711 712 PetscFunctionBegin; 713 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 714 715 if (m) rmax = ailen[0]; 716 for (i=1; i<mbs; i++) { 717 /* move each row back by the amount of empty slots (fshift) before it*/ 718 fshift += imax[i-1] - ailen[i-1]; 719 rmax = PetscMax(rmax,ailen[i]); 720 if (fshift) { 721 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 722 N = ailen[i]; 723 for (j=0; j<N; j++) { 724 ip[j-fshift] = ip[j]; 725 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 726 } 727 } 728 ai[i] = ai[i-1] + ailen[i-1]; 729 } 730 if (mbs) { 731 fshift += imax[mbs-1] - ailen[mbs-1]; 732 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 733 } 734 /* reset ilen and imax for each row */ 735 for (i=0; i<mbs; i++) { 736 ailen[i] = imax[i] = ai[i+1] - ai[i]; 737 } 738 a->nz = ai[mbs]; 739 740 /* diagonals may have moved, reset it */ 741 if (a->diag) { 742 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 743 } 744 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->rmap.N,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 745 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 746 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 747 a->reallocs = 0; 748 A->info.nz_unneeded = (PetscReal)fshift*bs2; 749 PetscFunctionReturn(0); 750 } 751 752 /* 753 This function returns an array of flags which indicate the locations of contiguous 754 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 755 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 756 Assume: sizes should be long enough to hold all the values. 757 */ 758 #undef __FUNCT__ 759 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 760 PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 761 { 762 PetscInt i,j,k,row; 763 PetscTruth flg; 764 765 PetscFunctionBegin; 766 for (i=0,j=0; i<n; j++) { 767 row = idx[i]; 768 if (row%bs!=0) { /* Not the begining of a block */ 769 sizes[j] = 1; 770 i++; 771 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 772 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 773 i++; 774 } else { /* Begining of the block, so check if the complete block exists */ 775 flg = PETSC_TRUE; 776 for (k=1; k<bs; k++) { 777 if (row+k != idx[i+k]) { /* break in the block */ 778 flg = PETSC_FALSE; 779 break; 780 } 781 } 782 if (flg) { /* No break in the bs */ 783 sizes[j] = bs; 784 i+= bs; 785 } else { 786 sizes[j] = 1; 787 i++; 788 } 789 } 790 } 791 *bs_max = j; 792 PetscFunctionReturn(0); 793 } 794 795 796 /* Only add/insert a(i,j) with i<=j (blocks). 797 Any a(i,j) with i>j input by user is ingored. 798 */ 799 800 #undef __FUNCT__ 801 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 802 PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 803 { 804 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 805 PetscErrorCode ierr; 806 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 807 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 808 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol; 809 PetscInt ridx,cidx,bs2=a->bs2; 810 MatScalar *ap,value,*aa=a->a,*bap; 811 812 PetscFunctionBegin; 813 for (k=0; k<m; k++) { /* loop over added rows */ 814 row = im[k]; /* row number */ 815 brow = row/bs; /* block row number */ 816 if (row < 0) continue; 817 #if defined(PETSC_USE_DEBUG) 818 if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1); 819 #endif 820 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 821 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 822 rmax = imax[brow]; /* maximum space allocated for this row */ 823 nrow = ailen[brow]; /* actual length of this row */ 824 low = 0; 825 826 for (l=0; l<n; l++) { /* loop over added columns */ 827 if (in[l] < 0) continue; 828 #if defined(PETSC_USE_DEBUG) 829 if (in[l] >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->rmap.N-1); 830 #endif 831 col = in[l]; 832 bcol = col/bs; /* block col number */ 833 834 if (brow > bcol) { 835 if (a->ignore_ltriangular){ 836 continue; /* ignore lower triangular values */ 837 } else { 838 SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR)"); 839 } 840 } 841 842 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 843 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 844 /* element value a(k,l) */ 845 if (roworiented) { 846 value = v[l + k*n]; 847 } else { 848 value = v[k + l*m]; 849 } 850 851 /* move pointer bap to a(k,l) quickly and add/insert value */ 852 if (col <= lastcol) low = 0; high = nrow; 853 lastcol = col; 854 while (high-low > 7) { 855 t = (low+high)/2; 856 if (rp[t] > bcol) high = t; 857 else low = t; 858 } 859 for (i=low; i<high; i++) { 860 if (rp[i] > bcol) break; 861 if (rp[i] == bcol) { 862 bap = ap + bs2*i + bs*cidx + ridx; 863 if (is == ADD_VALUES) *bap += value; 864 else *bap = value; 865 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 866 if (brow == bcol && ridx < cidx){ 867 bap = ap + bs2*i + bs*ridx + cidx; 868 if (is == ADD_VALUES) *bap += value; 869 else *bap = value; 870 } 871 goto noinsert1; 872 } 873 } 874 875 if (nonew == 1) goto noinsert1; 876 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 877 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 878 879 N = nrow++ - 1; high++; 880 /* shift up all the later entries in this row */ 881 for (ii=N; ii>=i; ii--) { 882 rp[ii+1] = rp[ii]; 883 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 884 } 885 if (N>=i) { 886 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 887 } 888 rp[i] = bcol; 889 ap[bs2*i + bs*cidx + ridx] = value; 890 noinsert1:; 891 low = i; 892 } 893 } /* end of loop over added columns */ 894 ailen[brow] = nrow; 895 } /* end of loop over added rows */ 896 PetscFunctionReturn(0); 897 } 898 899 #undef __FUNCT__ 900 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 901 PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info) 902 { 903 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 904 Mat outA; 905 PetscErrorCode ierr; 906 PetscTruth row_identity; 907 908 PetscFunctionBegin; 909 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels=0 is supported for in-place icc"); 910 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 911 if (!row_identity) SETERRQ(PETSC_ERR_SUP,"Matrix reordering is not supported"); 912 if (inA->rmap.bs != 1) SETERRQ1(PETSC_ERR_SUP,"Matrix block size %D is not supported",inA->rmap.bs); /* Need to replace MatCholeskyFactorSymbolic_SeqSBAIJ_MSR()! */ 913 914 outA = inA; 915 inA->factor = FACTOR_CHOLESKY; 916 917 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 918 /* 919 Blocksize < 8 have a special faster factorization/solver 920 for ICC(0) factorization with natural ordering 921 */ 922 switch (inA->rmap.bs){ /* Note: row_identity = PETSC_TRUE! */ 923 case 1: 924 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 925 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 926 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 927 inA->ops->solves = MatSolves_SeqSBAIJ_1; 928 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 929 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 930 ierr = PetscInfo(inA,"Using special in-place natural ordering solvetrans BS=1\n");CHKERRQ(ierr); 931 break; 932 case 2: 933 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 934 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 935 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 936 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_2_NaturalOrdering; 937 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering; 938 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=2\n");CHKERRQ(ierr); 939 break; 940 case 3: 941 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 942 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 943 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 944 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_3_NaturalOrdering; 945 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering; 946 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=3\n");CHKERRQ(ierr); 947 break; 948 case 4: 949 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 950 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 951 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 952 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_4_NaturalOrdering; 953 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering; 954 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=4\n");CHKERRQ(ierr); 955 break; 956 case 5: 957 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 958 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 959 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 960 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_5_NaturalOrdering; 961 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering; 962 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=5\n");CHKERRQ(ierr); 963 break; 964 case 6: 965 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 966 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 967 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 968 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_6_NaturalOrdering; 969 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering; 970 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=6\n");CHKERRQ(ierr); 971 break; 972 case 7: 973 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 974 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 975 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 976 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_7_NaturalOrdering; 977 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering; 978 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=7\n");CHKERRQ(ierr); 979 break; 980 default: 981 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering; 982 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_N_NaturalOrdering; 983 inA->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering; 984 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_N_NaturalOrdering; 985 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering; 986 break; 987 } 988 989 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 990 if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); } 991 a->row = row; 992 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 993 if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); } 994 a->col = row; 995 996 /* Create the invert permutation so that it can be used in MatCholeskyFactorNumeric() */ 997 if (a->icol) {ierr = ISInvertPermutation(row,PETSC_DECIDE, &a->icol);CHKERRQ(ierr);} 998 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 999 1000 if (!a->solve_work) { 1001 ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1002 ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr); 1003 } 1004 1005 ierr = MatCholeskyFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 1006 PetscFunctionReturn(0); 1007 } 1008 1009 EXTERN_C_BEGIN 1010 #undef __FUNCT__ 1011 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1012 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,PetscInt *indices) 1013 { 1014 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1015 PetscInt i,nz,n; 1016 1017 PetscFunctionBegin; 1018 nz = baij->maxnz; 1019 n = mat->cmap.n; 1020 for (i=0; i<nz; i++) { 1021 baij->j[i] = indices[i]; 1022 } 1023 baij->nz = nz; 1024 for (i=0; i<n; i++) { 1025 baij->ilen[i] = baij->imax[i]; 1026 } 1027 PetscFunctionReturn(0); 1028 } 1029 EXTERN_C_END 1030 1031 #undef __FUNCT__ 1032 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1033 /*@ 1034 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1035 in the matrix. 1036 1037 Input Parameters: 1038 + mat - the SeqSBAIJ matrix 1039 - indices - the column indices 1040 1041 Level: advanced 1042 1043 Notes: 1044 This can be called if you have precomputed the nonzero structure of the 1045 matrix and want to provide it to the matrix object to improve the performance 1046 of the MatSetValues() operation. 1047 1048 You MUST have set the correct numbers of nonzeros per row in the call to 1049 MatCreateSeqSBAIJ(), and the columns indices MUST be sorted. 1050 1051 MUST be called before any calls to MatSetValues() 1052 1053 .seealso: MatCreateSeqSBAIJ 1054 @*/ 1055 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1056 { 1057 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1058 1059 PetscFunctionBegin; 1060 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1061 PetscValidPointer(indices,2); 1062 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1063 if (f) { 1064 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1065 } else { 1066 SETERRQ(PETSC_ERR_SUP,"Wrong type of matrix to set column indices"); 1067 } 1068 PetscFunctionReturn(0); 1069 } 1070 1071 #undef __FUNCT__ 1072 #define __FUNCT__ "MatCopy_SeqSBAIJ" 1073 PetscErrorCode MatCopy_SeqSBAIJ(Mat A,Mat B,MatStructure str) 1074 { 1075 PetscErrorCode ierr; 1076 1077 PetscFunctionBegin; 1078 /* If the two matrices have the same copy implementation, use fast copy. */ 1079 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 1080 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1081 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1082 1083 if (a->i[A->rmap.N] != b->i[B->rmap.N]) { 1084 SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 1085 } 1086 ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr); 1087 } else { 1088 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1089 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1090 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1091 } 1092 PetscFunctionReturn(0); 1093 } 1094 1095 #undef __FUNCT__ 1096 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1097 PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A) 1098 { 1099 PetscErrorCode ierr; 1100 1101 PetscFunctionBegin; 1102 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 1103 PetscFunctionReturn(0); 1104 } 1105 1106 #undef __FUNCT__ 1107 #define __FUNCT__ "MatGetArray_SeqSBAIJ" 1108 PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1109 { 1110 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1111 PetscFunctionBegin; 1112 *array = a->a; 1113 PetscFunctionReturn(0); 1114 } 1115 1116 #undef __FUNCT__ 1117 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ" 1118 PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1119 { 1120 PetscFunctionBegin; 1121 PetscFunctionReturn(0); 1122 } 1123 1124 #include "petscblaslapack.h" 1125 #undef __FUNCT__ 1126 #define __FUNCT__ "MatAXPY_SeqSBAIJ" 1127 PetscErrorCode MatAXPY_SeqSBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1128 { 1129 Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data; 1130 PetscErrorCode ierr; 1131 PetscInt i,bs=Y->rmap.bs,bs2,j; 1132 PetscBLASInt bnz = (PetscBLASInt)x->nz,one = 1; 1133 1134 PetscFunctionBegin; 1135 if (str == SAME_NONZERO_PATTERN) { 1136 PetscScalar alpha = a; 1137 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1138 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1139 if (y->xtoy && y->XtoY != X) { 1140 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1141 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1142 } 1143 if (!y->xtoy) { /* get xtoy */ 1144 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1145 y->XtoY = X; 1146 } 1147 bs2 = bs*bs; 1148 for (i=0; i<x->nz; i++) { 1149 j = 0; 1150 while (j < bs2){ 1151 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 1152 j++; 1153 } 1154 } 1155 ierr = PetscInfo3(0,"ratio of nnz_s(X)/nnz_s(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 1156 } else { 1157 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1158 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1159 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1160 } 1161 PetscFunctionReturn(0); 1162 } 1163 1164 #undef __FUNCT__ 1165 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ" 1166 PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg) 1167 { 1168 PetscFunctionBegin; 1169 *flg = PETSC_TRUE; 1170 PetscFunctionReturn(0); 1171 } 1172 1173 #undef __FUNCT__ 1174 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ" 1175 PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg) 1176 { 1177 PetscFunctionBegin; 1178 *flg = PETSC_TRUE; 1179 PetscFunctionReturn(0); 1180 } 1181 1182 #undef __FUNCT__ 1183 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ" 1184 PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscTruth *flg) 1185 { 1186 PetscFunctionBegin; 1187 *flg = PETSC_FALSE; 1188 PetscFunctionReturn(0); 1189 } 1190 1191 #undef __FUNCT__ 1192 #define __FUNCT__ "MatRealPart_SeqSBAIJ" 1193 PetscErrorCode MatRealPart_SeqSBAIJ(Mat A) 1194 { 1195 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1196 PetscInt i,nz = a->bs2*a->i[a->mbs]; 1197 PetscScalar *aa = a->a; 1198 1199 PetscFunctionBegin; 1200 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 1201 PetscFunctionReturn(0); 1202 } 1203 1204 #undef __FUNCT__ 1205 #define __FUNCT__ "MatImaginaryPart_SeqSBAIJ" 1206 PetscErrorCode MatImaginaryPart_SeqSBAIJ(Mat A) 1207 { 1208 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1209 PetscInt i,nz = a->bs2*a->i[a->mbs]; 1210 PetscScalar *aa = a->a; 1211 1212 PetscFunctionBegin; 1213 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 1214 PetscFunctionReturn(0); 1215 } 1216 1217 /* -------------------------------------------------------------------*/ 1218 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1219 MatGetRow_SeqSBAIJ, 1220 MatRestoreRow_SeqSBAIJ, 1221 MatMult_SeqSBAIJ_N, 1222 /* 4*/ MatMultAdd_SeqSBAIJ_N, 1223 MatMult_SeqSBAIJ_N, /* transpose versions are same as non-transpose versions */ 1224 MatMultAdd_SeqSBAIJ_N, 1225 MatSolve_SeqSBAIJ_N, 1226 0, 1227 0, 1228 /*10*/ 0, 1229 0, 1230 MatCholeskyFactor_SeqSBAIJ, 1231 MatRelax_SeqSBAIJ, 1232 MatTranspose_SeqSBAIJ, 1233 /*15*/ MatGetInfo_SeqSBAIJ, 1234 MatEqual_SeqSBAIJ, 1235 MatGetDiagonal_SeqSBAIJ, 1236 MatDiagonalScale_SeqSBAIJ, 1237 MatNorm_SeqSBAIJ, 1238 /*20*/ 0, 1239 MatAssemblyEnd_SeqSBAIJ, 1240 0, 1241 MatSetOption_SeqSBAIJ, 1242 MatZeroEntries_SeqSBAIJ, 1243 /*25*/ 0, 1244 0, 1245 0, 1246 MatCholeskyFactorSymbolic_SeqSBAIJ, 1247 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1248 /*30*/ MatSetUpPreallocation_SeqSBAIJ, 1249 0, 1250 MatICCFactorSymbolic_SeqSBAIJ, 1251 MatGetArray_SeqSBAIJ, 1252 MatRestoreArray_SeqSBAIJ, 1253 /*35*/ MatDuplicate_SeqSBAIJ, 1254 MatForwardSolve_SeqSBAIJ_N, 1255 MatBackwardSolve_SeqSBAIJ_N, 1256 0, 1257 MatICCFactor_SeqSBAIJ, 1258 /*40*/ MatAXPY_SeqSBAIJ, 1259 MatGetSubMatrices_SeqSBAIJ, 1260 MatIncreaseOverlap_SeqSBAIJ, 1261 MatGetValues_SeqSBAIJ, 1262 MatCopy_SeqSBAIJ, 1263 /*45*/ 0, 1264 MatScale_SeqSBAIJ, 1265 0, 1266 0, 1267 0, 1268 /*50*/ 0, 1269 MatGetRowIJ_SeqSBAIJ, 1270 MatRestoreRowIJ_SeqSBAIJ, 1271 0, 1272 0, 1273 /*55*/ 0, 1274 0, 1275 0, 1276 0, 1277 MatSetValuesBlocked_SeqSBAIJ, 1278 /*60*/ MatGetSubMatrix_SeqSBAIJ, 1279 0, 1280 0, 1281 0, 1282 0, 1283 /*65*/ 0, 1284 0, 1285 0, 1286 0, 1287 0, 1288 /*70*/ MatGetRowMaxAbs_SeqSBAIJ, 1289 0, 1290 0, 1291 0, 1292 0, 1293 /*75*/ 0, 1294 0, 1295 0, 1296 0, 1297 0, 1298 /*80*/ 0, 1299 0, 1300 0, 1301 #if !defined(PETSC_USE_COMPLEX) 1302 MatGetInertia_SeqSBAIJ, 1303 #else 1304 0, 1305 #endif 1306 MatLoad_SeqSBAIJ, 1307 /*85*/ MatIsSymmetric_SeqSBAIJ, 1308 MatIsHermitian_SeqSBAIJ, 1309 MatIsStructurallySymmetric_SeqSBAIJ, 1310 0, 1311 0, 1312 /*90*/ 0, 1313 0, 1314 0, 1315 0, 1316 0, 1317 /*95*/ 0, 1318 0, 1319 0, 1320 0, 1321 0, 1322 /*100*/0, 1323 0, 1324 0, 1325 0, 1326 0, 1327 /*105*/0, 1328 MatRealPart_SeqSBAIJ, 1329 MatImaginaryPart_SeqSBAIJ, 1330 MatGetRowUpperTriangular_SeqSBAIJ, 1331 MatRestoreRowUpperTriangular_SeqSBAIJ 1332 }; 1333 1334 EXTERN_C_BEGIN 1335 #undef __FUNCT__ 1336 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1337 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqSBAIJ(Mat mat) 1338 { 1339 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1340 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 1341 PetscErrorCode ierr; 1342 1343 PetscFunctionBegin; 1344 if (aij->nonew != 1) { 1345 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1346 } 1347 1348 /* allocate space for values if not already there */ 1349 if (!aij->saved_values) { 1350 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1351 } 1352 1353 /* copy values over */ 1354 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1355 PetscFunctionReturn(0); 1356 } 1357 EXTERN_C_END 1358 1359 EXTERN_C_BEGIN 1360 #undef __FUNCT__ 1361 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1362 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqSBAIJ(Mat mat) 1363 { 1364 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1365 PetscErrorCode ierr; 1366 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 1367 1368 PetscFunctionBegin; 1369 if (aij->nonew != 1) { 1370 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1371 } 1372 if (!aij->saved_values) { 1373 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 1374 } 1375 1376 /* copy values over */ 1377 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1378 PetscFunctionReturn(0); 1379 } 1380 EXTERN_C_END 1381 1382 EXTERN_C_BEGIN 1383 #undef __FUNCT__ 1384 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ" 1385 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 1386 { 1387 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1388 PetscErrorCode ierr; 1389 PetscInt i,mbs,bs2; 1390 PetscTruth skipallocation = PETSC_FALSE,flg; 1391 1392 PetscFunctionBegin; 1393 B->preallocated = PETSC_TRUE; 1394 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1395 B->rmap.bs = B->cmap.bs = bs; 1396 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 1397 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 1398 1399 mbs = B->rmap.N/bs; 1400 bs2 = bs*bs; 1401 1402 if (mbs*bs != B->rmap.N) { 1403 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1404 } 1405 1406 if (nz == MAT_SKIP_ALLOCATION) { 1407 skipallocation = PETSC_TRUE; 1408 nz = 0; 1409 } 1410 1411 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1412 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 1413 if (nnz) { 1414 for (i=0; i<mbs; i++) { 1415 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 1416 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],mbs); 1417 } 1418 } 1419 1420 ierr = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1421 if (!flg) { 1422 switch (bs) { 1423 case 1: 1424 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1425 B->ops->solve = MatSolve_SeqSBAIJ_1; 1426 B->ops->solves = MatSolves_SeqSBAIJ_1; 1427 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 1428 B->ops->mult = MatMult_SeqSBAIJ_1; 1429 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1430 B->ops->multtranspose = MatMult_SeqSBAIJ_1; 1431 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_1; 1432 break; 1433 case 2: 1434 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1435 B->ops->solve = MatSolve_SeqSBAIJ_2; 1436 B->ops->solvetranspose = MatSolve_SeqSBAIJ_2; 1437 B->ops->mult = MatMult_SeqSBAIJ_2; 1438 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1439 B->ops->multtranspose = MatMult_SeqSBAIJ_2; 1440 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_2; 1441 break; 1442 case 3: 1443 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1444 B->ops->solve = MatSolve_SeqSBAIJ_3; 1445 B->ops->solvetranspose = MatSolve_SeqSBAIJ_3; 1446 B->ops->mult = MatMult_SeqSBAIJ_3; 1447 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1448 B->ops->multtranspose = MatMult_SeqSBAIJ_3; 1449 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_3; 1450 break; 1451 case 4: 1452 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1453 B->ops->solve = MatSolve_SeqSBAIJ_4; 1454 B->ops->solvetranspose = MatSolve_SeqSBAIJ_4; 1455 B->ops->mult = MatMult_SeqSBAIJ_4; 1456 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1457 B->ops->multtranspose = MatMult_SeqSBAIJ_4; 1458 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_4; 1459 break; 1460 case 5: 1461 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1462 B->ops->solve = MatSolve_SeqSBAIJ_5; 1463 B->ops->solvetranspose = MatSolve_SeqSBAIJ_5; 1464 B->ops->mult = MatMult_SeqSBAIJ_5; 1465 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1466 B->ops->multtranspose = MatMult_SeqSBAIJ_5; 1467 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_5; 1468 break; 1469 case 6: 1470 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1471 B->ops->solve = MatSolve_SeqSBAIJ_6; 1472 B->ops->solvetranspose = MatSolve_SeqSBAIJ_6; 1473 B->ops->mult = MatMult_SeqSBAIJ_6; 1474 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1475 B->ops->multtranspose = MatMult_SeqSBAIJ_6; 1476 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_6; 1477 break; 1478 case 7: 1479 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1480 B->ops->solve = MatSolve_SeqSBAIJ_7; 1481 B->ops->solvetranspose = MatSolve_SeqSBAIJ_7; 1482 B->ops->mult = MatMult_SeqSBAIJ_7; 1483 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1484 B->ops->multtranspose = MatMult_SeqSBAIJ_7; 1485 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_7; 1486 break; 1487 } 1488 } 1489 1490 b->mbs = mbs; 1491 b->nbs = mbs; 1492 if (!skipallocation) { 1493 /* b->ilen will count nonzeros in each block row so far. */ 1494 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 1495 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1496 if (!nnz) { 1497 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1498 else if (nz <= 0) nz = 1; 1499 for (i=0; i<mbs; i++) { 1500 b->imax[i] = nz; 1501 } 1502 nz = nz*mbs; /* total nz */ 1503 } else { 1504 nz = 0; 1505 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1506 } 1507 /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1508 1509 /* allocate the matrix space */ 1510 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr); 1511 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1512 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1513 b->singlemalloc = PETSC_TRUE; 1514 1515 /* pointer to beginning of each row */ 1516 b->i[0] = 0; 1517 for (i=1; i<mbs+1; i++) { 1518 b->i[i] = b->i[i-1] + b->imax[i-1]; 1519 } 1520 b->free_a = PETSC_TRUE; 1521 b->free_ij = PETSC_TRUE; 1522 } else { 1523 b->free_a = PETSC_FALSE; 1524 b->free_ij = PETSC_FALSE; 1525 } 1526 1527 B->rmap.bs = bs; 1528 b->bs2 = bs2; 1529 b->nz = 0; 1530 b->maxnz = nz*bs2; 1531 1532 b->inew = 0; 1533 b->jnew = 0; 1534 b->anew = 0; 1535 b->a2anew = 0; 1536 b->permute = PETSC_FALSE; 1537 PetscFunctionReturn(0); 1538 } 1539 EXTERN_C_END 1540 1541 EXTERN_C_BEGIN 1542 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 1543 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqBAIJ(Mat, MatType,MatReuse,Mat*); 1544 EXTERN_C_END 1545 1546 /*MC 1547 MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices, 1548 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1549 1550 Options Database Keys: 1551 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions() 1552 1553 Level: beginner 1554 1555 .seealso: MatCreateSeqSBAIJ 1556 M*/ 1557 1558 EXTERN_C_BEGIN 1559 #undef __FUNCT__ 1560 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1561 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqSBAIJ(Mat B) 1562 { 1563 Mat_SeqSBAIJ *b; 1564 PetscErrorCode ierr; 1565 PetscMPIInt size; 1566 PetscTruth flg; 1567 1568 PetscFunctionBegin; 1569 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1570 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1571 1572 ierr = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1573 B->data = (void*)b; 1574 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1575 B->ops->destroy = MatDestroy_SeqSBAIJ; 1576 B->ops->view = MatView_SeqSBAIJ; 1577 B->factor = 0; 1578 B->mapping = 0; 1579 b->row = 0; 1580 b->icol = 0; 1581 b->reallocs = 0; 1582 b->saved_values = 0; 1583 1584 1585 b->sorted = PETSC_FALSE; 1586 b->roworiented = PETSC_TRUE; 1587 b->nonew = 0; 1588 b->diag = 0; 1589 b->solve_work = 0; 1590 b->mult_work = 0; 1591 B->spptr = 0; 1592 b->keepzeroedrows = PETSC_FALSE; 1593 b->xtoy = 0; 1594 b->XtoY = 0; 1595 1596 b->inew = 0; 1597 b->jnew = 0; 1598 b->anew = 0; 1599 b->a2anew = 0; 1600 b->permute = PETSC_FALSE; 1601 1602 b->ignore_ltriangular = PETSC_FALSE; 1603 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_ignore_lower_triangular",&flg);CHKERRQ(ierr); 1604 if (flg) b->ignore_ltriangular = PETSC_TRUE; 1605 1606 b->getrow_utriangular = PETSC_FALSE; 1607 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_getrow_uppertriangular",&flg);CHKERRQ(ierr); 1608 if (flg) b->getrow_utriangular = PETSC_TRUE; 1609 1610 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1611 "MatStoreValues_SeqSBAIJ", 1612 MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1613 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1614 "MatRetrieveValues_SeqSBAIJ", 1615 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1616 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1617 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1618 MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1619 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C", 1620 "MatConvert_SeqSBAIJ_SeqAIJ", 1621 MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr); 1622 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C", 1623 "MatConvert_SeqSBAIJ_SeqBAIJ", 1624 MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr); 1625 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C", 1626 "MatSeqSBAIJSetPreallocation_SeqSBAIJ", 1627 MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr); 1628 1629 B->symmetric = PETSC_TRUE; 1630 B->structurally_symmetric = PETSC_TRUE; 1631 B->symmetric_set = PETSC_TRUE; 1632 B->structurally_symmetric_set = PETSC_TRUE; 1633 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQSBAIJ);CHKERRQ(ierr); 1634 PetscFunctionReturn(0); 1635 } 1636 EXTERN_C_END 1637 1638 #undef __FUNCT__ 1639 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1640 /*@C 1641 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1642 compressed row) format. For good matrix assembly performance the 1643 user should preallocate the matrix storage by setting the parameter nz 1644 (or the array nnz). By setting these parameters accurately, performance 1645 during matrix assembly can be increased by more than a factor of 50. 1646 1647 Collective on Mat 1648 1649 Input Parameters: 1650 + A - the symmetric matrix 1651 . bs - size of block 1652 . nz - number of block nonzeros per block row (same for all rows) 1653 - nnz - array containing the number of block nonzeros in the upper triangular plus 1654 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1655 1656 Options Database Keys: 1657 . -mat_no_unroll - uses code that does not unroll the loops in the 1658 block calculations (much slower) 1659 . -mat_block_size - size of the blocks to use 1660 1661 Level: intermediate 1662 1663 Notes: 1664 Specify the preallocated storage with either nz or nnz (not both). 1665 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1666 allocation. For additional details, see the users manual chapter on 1667 matrices. 1668 1669 If the nnz parameter is given then the nz parameter is ignored 1670 1671 1672 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1673 @*/ 1674 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 1675 { 1676 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 1677 1678 PetscFunctionBegin; 1679 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1680 if (f) { 1681 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 1682 } 1683 PetscFunctionReturn(0); 1684 } 1685 1686 #undef __FUNCT__ 1687 #define __FUNCT__ "MatCreateSeqSBAIJ" 1688 /*@C 1689 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1690 compressed row) format. For good matrix assembly performance the 1691 user should preallocate the matrix storage by setting the parameter nz 1692 (or the array nnz). By setting these parameters accurately, performance 1693 during matrix assembly can be increased by more than a factor of 50. 1694 1695 Collective on MPI_Comm 1696 1697 Input Parameters: 1698 + comm - MPI communicator, set to PETSC_COMM_SELF 1699 . bs - size of block 1700 . m - number of rows, or number of columns 1701 . nz - number of block nonzeros per block row (same for all rows) 1702 - nnz - array containing the number of block nonzeros in the upper triangular plus 1703 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1704 1705 Output Parameter: 1706 . A - the symmetric matrix 1707 1708 Options Database Keys: 1709 . -mat_no_unroll - uses code that does not unroll the loops in the 1710 block calculations (much slower) 1711 . -mat_block_size - size of the blocks to use 1712 1713 Level: intermediate 1714 1715 Notes: 1716 The number of rows and columns must be divisible by blocksize. 1717 This matrix type does not support complex Hermitian operation. 1718 1719 Specify the preallocated storage with either nz or nnz (not both). 1720 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1721 allocation. For additional details, see the users manual chapter on 1722 matrices. 1723 1724 If the nnz parameter is given then the nz parameter is ignored 1725 1726 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1727 @*/ 1728 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 1729 { 1730 PetscErrorCode ierr; 1731 1732 PetscFunctionBegin; 1733 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1734 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 1735 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1736 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 1737 PetscFunctionReturn(0); 1738 } 1739 1740 #undef __FUNCT__ 1741 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1742 PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1743 { 1744 Mat C; 1745 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1746 PetscErrorCode ierr; 1747 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 1748 1749 PetscFunctionBegin; 1750 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1751 1752 *B = 0; 1753 ierr = MatCreate(A->comm,&C);CHKERRQ(ierr); 1754 ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr); 1755 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 1756 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1757 c = (Mat_SeqSBAIJ*)C->data; 1758 1759 C->preallocated = PETSC_TRUE; 1760 C->factor = A->factor; 1761 c->row = 0; 1762 c->icol = 0; 1763 c->saved_values = 0; 1764 c->keepzeroedrows = a->keepzeroedrows; 1765 C->assembled = PETSC_TRUE; 1766 1767 ierr = PetscMapCopy(A->comm,&A->rmap,&C->rmap);CHKERRQ(ierr); 1768 ierr = PetscMapCopy(A->comm,&A->cmap,&C->cmap);CHKERRQ(ierr); 1769 c->bs2 = a->bs2; 1770 c->mbs = a->mbs; 1771 c->nbs = a->nbs; 1772 1773 ierr = PetscMalloc2((mbs+1),PetscInt,&c->imax,(mbs+1),PetscInt,&c->ilen);CHKERRQ(ierr); 1774 for (i=0; i<mbs; i++) { 1775 c->imax[i] = a->imax[i]; 1776 c->ilen[i] = a->ilen[i]; 1777 } 1778 1779 /* allocate the matrix space */ 1780 ierr = PetscMalloc3(bs2*nz,MatScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 1781 c->singlemalloc = PETSC_TRUE; 1782 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1783 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt)));CHKERRQ(ierr); 1784 if (mbs > 0) { 1785 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1786 if (cpvalues == MAT_COPY_VALUES) { 1787 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1788 } else { 1789 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1790 } 1791 } 1792 1793 c->sorted = a->sorted; 1794 c->roworiented = a->roworiented; 1795 c->nonew = a->nonew; 1796 1797 if (a->diag) { 1798 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 1799 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1800 for (i=0; i<mbs; i++) { 1801 c->diag[i] = a->diag[i]; 1802 } 1803 } else c->diag = 0; 1804 c->nz = a->nz; 1805 c->maxnz = a->maxnz; 1806 c->solve_work = 0; 1807 c->mult_work = 0; 1808 c->free_a = PETSC_TRUE; 1809 c->free_ij = PETSC_TRUE; 1810 *B = C; 1811 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1812 PetscFunctionReturn(0); 1813 } 1814 1815 #undef __FUNCT__ 1816 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1817 PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer, MatType type,Mat *A) 1818 { 1819 Mat_SeqSBAIJ *a; 1820 Mat B; 1821 PetscErrorCode ierr; 1822 int fd; 1823 PetscMPIInt size; 1824 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 1825 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 1826 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 1827 PetscInt *masked,nmask,tmp,bs2,ishift; 1828 PetscScalar *aa; 1829 MPI_Comm comm = ((PetscObject)viewer)->comm; 1830 1831 PetscFunctionBegin; 1832 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1833 bs2 = bs*bs; 1834 1835 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1836 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1837 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1838 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1839 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1840 M = header[1]; N = header[2]; nz = header[3]; 1841 1842 if (header[3] < 0) { 1843 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1844 } 1845 1846 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1847 1848 /* 1849 This code adds extra rows to make sure the number of rows is 1850 divisible by the blocksize 1851 */ 1852 mbs = M/bs; 1853 extra_rows = bs - M + bs*(mbs); 1854 if (extra_rows == bs) extra_rows = 0; 1855 else mbs++; 1856 if (extra_rows) { 1857 ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 1858 } 1859 1860 /* read in row lengths */ 1861 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1862 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1863 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1864 1865 /* read in column indices */ 1866 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1867 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1868 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1869 1870 /* loop over row lengths determining block row lengths */ 1871 ierr = PetscMalloc(mbs*sizeof(PetscInt),&s_browlengths);CHKERRQ(ierr); 1872 ierr = PetscMemzero(s_browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1873 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 1874 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1875 masked = mask + mbs; 1876 rowcount = 0; nzcount = 0; 1877 for (i=0; i<mbs; i++) { 1878 nmask = 0; 1879 for (j=0; j<bs; j++) { 1880 kmax = rowlengths[rowcount]; 1881 for (k=0; k<kmax; k++) { 1882 tmp = jj[nzcount++]/bs; /* block col. index */ 1883 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1884 } 1885 rowcount++; 1886 } 1887 s_browlengths[i] += nmask; 1888 1889 /* zero out the mask elements we set */ 1890 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1891 } 1892 1893 /* create our matrix */ 1894 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 1895 ierr = MatSetSizes(B,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 1896 ierr = MatSetType(B,type);CHKERRQ(ierr); 1897 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(B,bs,0,s_browlengths);CHKERRQ(ierr); 1898 a = (Mat_SeqSBAIJ*)B->data; 1899 1900 /* set matrix "i" values */ 1901 a->i[0] = 0; 1902 for (i=1; i<= mbs; i++) { 1903 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1904 a->ilen[i-1] = s_browlengths[i-1]; 1905 } 1906 a->nz = a->i[mbs]; 1907 1908 /* read in nonzero values */ 1909 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1910 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1911 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1912 1913 /* set "a" and "j" values into matrix */ 1914 nzcount = 0; jcount = 0; 1915 for (i=0; i<mbs; i++) { 1916 nzcountb = nzcount; 1917 nmask = 0; 1918 for (j=0; j<bs; j++) { 1919 kmax = rowlengths[i*bs+j]; 1920 for (k=0; k<kmax; k++) { 1921 tmp = jj[nzcount++]/bs; /* block col. index */ 1922 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1923 } 1924 } 1925 /* sort the masked values */ 1926 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1927 1928 /* set "j" values into matrix */ 1929 maskcount = 1; 1930 for (j=0; j<nmask; j++) { 1931 a->j[jcount++] = masked[j]; 1932 mask[masked[j]] = maskcount++; 1933 } 1934 1935 /* set "a" values into matrix */ 1936 ishift = bs2*a->i[i]; 1937 for (j=0; j<bs; j++) { 1938 kmax = rowlengths[i*bs+j]; 1939 for (k=0; k<kmax; k++) { 1940 tmp = jj[nzcountb]/bs ; /* block col. index */ 1941 if (tmp >= i){ 1942 block = mask[tmp] - 1; 1943 point = jj[nzcountb] - bs*tmp; 1944 idx = ishift + bs2*block + j + bs*point; 1945 a->a[idx] = aa[nzcountb]; 1946 } 1947 nzcountb++; 1948 } 1949 } 1950 /* zero out the mask elements we set */ 1951 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1952 } 1953 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1954 1955 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1956 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1957 ierr = PetscFree(aa);CHKERRQ(ierr); 1958 ierr = PetscFree(jj);CHKERRQ(ierr); 1959 ierr = PetscFree(mask);CHKERRQ(ierr); 1960 1961 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1962 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1963 ierr = MatView_Private(B);CHKERRQ(ierr); 1964 *A = B; 1965 PetscFunctionReturn(0); 1966 } 1967 1968 #undef __FUNCT__ 1969 #define __FUNCT__ "MatRelax_SeqSBAIJ" 1970 PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1971 { 1972 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1973 MatScalar *aa=a->a,*v,*v1; 1974 PetscScalar *x,*b,*t,sum,d; 1975 PetscErrorCode ierr; 1976 PetscInt m=a->mbs,bs=A->rmap.bs,*ai=a->i,*aj=a->j; 1977 PetscInt nz,nz1,*vj,*vj1,i; 1978 1979 PetscFunctionBegin; 1980 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 1981 1982 its = its*lits; 1983 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1984 1985 if (bs > 1) 1986 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 1987 1988 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1989 if (xx != bb) { 1990 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1991 } else { 1992 b = x; 1993 } 1994 1995 if (!a->relax_work) { 1996 ierr = PetscMalloc(m*sizeof(PetscScalar),&a->relax_work);CHKERRQ(ierr); 1997 } 1998 t = a->relax_work; 1999 2000 if (flag & SOR_ZERO_INITIAL_GUESS) { 2001 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2002 ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr); 2003 2004 for (i=0; i<m; i++){ 2005 d = *(aa + ai[i]); /* diag[i] */ 2006 v = aa + ai[i] + 1; 2007 vj = aj + ai[i] + 1; 2008 nz = ai[i+1] - ai[i] - 1; 2009 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2010 x[i] = omega*t[i]/d; 2011 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 2012 } 2013 } 2014 2015 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2016 if (!(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP)){ 2017 t = b; 2018 } 2019 2020 for (i=m-1; i>=0; i--){ 2021 d = *(aa + ai[i]); 2022 v = aa + ai[i] + 1; 2023 vj = aj + ai[i] + 1; 2024 nz = ai[i+1] - ai[i] - 1; 2025 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2026 sum = t[i]; 2027 while (nz--) sum -= x[*vj++]*(*v++); 2028 x[i] = (1-omega)*x[i] + omega*sum/d; 2029 } 2030 t = a->relax_work; 2031 } 2032 its--; 2033 } 2034 2035 while (its--) { 2036 /* 2037 forward sweep: 2038 for i=0,...,m-1: 2039 sum[i] = (b[i] - U(i,:)x )/d[i]; 2040 x[i] = (1-omega)x[i] + omega*sum[i]; 2041 b = b - x[i]*U^T(i,:); 2042 2043 */ 2044 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2045 ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr); 2046 2047 for (i=0; i<m; i++){ 2048 d = *(aa + ai[i]); /* diag[i] */ 2049 v = aa + ai[i] + 1; v1=v; 2050 vj = aj + ai[i] + 1; vj1=vj; 2051 nz = ai[i+1] - ai[i] - 1; nz1=nz; 2052 sum = t[i]; 2053 ierr = PetscLogFlops(4*nz-2);CHKERRQ(ierr); 2054 while (nz1--) sum -= (*v1++)*x[*vj1++]; 2055 x[i] = (1-omega)*x[i] + omega*sum/d; 2056 while (nz--) t[*vj++] -= x[i]*(*v++); 2057 } 2058 } 2059 2060 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2061 /* 2062 backward sweep: 2063 b = b - x[i]*U^T(i,:), i=0,...,n-2 2064 for i=m-1,...,0: 2065 sum[i] = (b[i] - U(i,:)x )/d[i]; 2066 x[i] = (1-omega)x[i] + omega*sum[i]; 2067 */ 2068 /* if there was a forward sweep done above then I thing the next two for loops are not needed */ 2069 ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr); 2070 2071 for (i=0; i<m-1; i++){ /* update rhs */ 2072 v = aa + ai[i] + 1; 2073 vj = aj + ai[i] + 1; 2074 nz = ai[i+1] - ai[i] - 1; 2075 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2076 while (nz--) t[*vj++] -= x[i]*(*v++); 2077 } 2078 for (i=m-1; i>=0; i--){ 2079 d = *(aa + ai[i]); 2080 v = aa + ai[i] + 1; 2081 vj = aj + ai[i] + 1; 2082 nz = ai[i+1] - ai[i] - 1; 2083 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2084 sum = t[i]; 2085 while (nz--) sum -= x[*vj++]*(*v++); 2086 x[i] = (1-omega)*x[i] + omega*sum/d; 2087 } 2088 } 2089 } 2090 2091 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2092 if (bb != xx) { 2093 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2094 } 2095 PetscFunctionReturn(0); 2096 } 2097 2098 #undef __FUNCT__ 2099 #define __FUNCT__ "MatCreateSeqSBAIJWithArrays" 2100 /*@ 2101 MatCreateSeqSBAIJWithArrays - Creates an sequential SBAIJ matrix using matrix elements 2102 (upper triangular entries in CSR format) provided by the user. 2103 2104 Collective on MPI_Comm 2105 2106 Input Parameters: 2107 + comm - must be an MPI communicator of size 1 2108 . bs - size of block 2109 . m - number of rows 2110 . n - number of columns 2111 . i - row indices 2112 . j - column indices 2113 - a - matrix values 2114 2115 Output Parameter: 2116 . mat - the matrix 2117 2118 Level: intermediate 2119 2120 Notes: 2121 The i, j, and a arrays are not copied by this routine, the user must free these arrays 2122 once the matrix is destroyed 2123 2124 You cannot set new nonzero locations into this matrix, that will generate an error. 2125 2126 The i and j indices are 0 based 2127 2128 .seealso: MatCreate(), MatCreateMPISBAIJ(), MatCreateSeqSBAIJ() 2129 2130 @*/ 2131 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqSBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 2132 { 2133 PetscErrorCode ierr; 2134 PetscInt ii; 2135 Mat_SeqSBAIJ *sbaij; 2136 2137 PetscFunctionBegin; 2138 if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 2139 if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2140 2141 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2142 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 2143 ierr = MatSetType(*mat,MATSEQSBAIJ);CHKERRQ(ierr); 2144 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 2145 sbaij = (Mat_SeqSBAIJ*)(*mat)->data; 2146 ierr = PetscMalloc2(m,PetscInt,&sbaij->imax,m,PetscInt,&sbaij->ilen);CHKERRQ(ierr); 2147 2148 sbaij->i = i; 2149 sbaij->j = j; 2150 sbaij->a = a; 2151 sbaij->singlemalloc = PETSC_FALSE; 2152 sbaij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 2153 sbaij->free_a = PETSC_FALSE; 2154 sbaij->free_ij = PETSC_FALSE; 2155 2156 for (ii=0; ii<m; ii++) { 2157 sbaij->ilen[ii] = sbaij->imax[ii] = i[ii+1] - i[ii]; 2158 #if defined(PETSC_USE_DEBUG) 2159 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 2160 #endif 2161 } 2162 #if defined(PETSC_USE_DEBUG) 2163 for (ii=0; ii<sbaij->i[m]; ii++) { 2164 if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 2165 if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 2166 } 2167 #endif 2168 2169 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2170 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2171 PetscFunctionReturn(0); 2172 } 2173 2174 2175 2176 2177 2178