1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the SBAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "src/mat/impls/baij/seq/baij.h" /*I "petscmat.h" I*/ 8 #include "src/inline/spops.h" 9 #include "src/mat/impls/sbaij/seq/sbaij.h" 10 11 #define CHUNKSIZE 10 12 13 /* 14 Checks for missing diagonals 15 */ 16 #undef __FUNCT__ 17 #define __FUNCT__ "MatMissingDiagonal_SeqSBAIJ" 18 PetscErrorCode MatMissingDiagonal_SeqSBAIJ(Mat A) 19 { 20 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 21 PetscErrorCode ierr; 22 PetscInt *diag,*jj = a->j,i; 23 24 PetscFunctionBegin; 25 ierr = MatMarkDiagonal_SeqSBAIJ(A);CHKERRQ(ierr); 26 diag = a->diag; 27 for (i=0; i<a->mbs; i++) { 28 if (jj[diag[i]] != i) SETERRQ1(PETSC_ERR_ARG_CORRUPT,"Matrix is missing diagonal number %D",i); 29 } 30 PetscFunctionReturn(0); 31 } 32 33 #undef __FUNCT__ 34 #define __FUNCT__ "MatMarkDiagonal_SeqSBAIJ" 35 PetscErrorCode MatMarkDiagonal_SeqSBAIJ(Mat A) 36 { 37 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 38 PetscErrorCode ierr; 39 PetscInt i; 40 41 PetscFunctionBegin; 42 if (!a->diag) { 43 ierr = PetscMalloc(a->mbs*sizeof(PetscInt),&a->diag);CHKERRQ(ierr); 44 } 45 for (i=0; i<a->mbs; i++) a->diag[i] = a->i[i]; 46 PetscFunctionReturn(0); 47 } 48 49 #undef __FUNCT__ 50 #define __FUNCT__ "MatGetRowIJ_SeqSBAIJ" 51 static PetscErrorCode MatGetRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 52 { 53 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 54 PetscInt i,j,n = a->mbs,nz = a->i[n],bs = A->rmap.bs; 55 PetscErrorCode ierr; 56 57 PetscFunctionBegin; 58 *nn = n; 59 if (!ia) PetscFunctionReturn(0); 60 if (!blockcompressed) { 61 /* malloc & create the natural set of indices */ 62 ierr = PetscMalloc2((n+1)*bs,PetscInt,ia,nz*bs,PetscInt,ja);CHKERRQ(ierr); 63 for (i=0; i<n+1; i++) { 64 for (j=0; j<bs; j++) { 65 *ia[i*bs+j] = a->i[i]*bs+j+oshift; 66 } 67 } 68 for (i=0; i<nz; i++) { 69 for (j=0; j<bs; j++) { 70 *ja[i*bs+j] = a->j[i]*bs+j+oshift; 71 } 72 } 73 } else { /* blockcompressed */ 74 if (oshift == 1) { 75 /* temporarily add 1 to i and j indices */ 76 for (i=0; i<nz; i++) a->j[i]++; 77 for (i=0; i<n+1; i++) a->i[i]++; 78 } 79 *ia = a->i; *ja = a->j; 80 } 81 82 PetscFunctionReturn(0); 83 } 84 85 #undef __FUNCT__ 86 #define __FUNCT__ "MatRestoreRowIJ_SeqSBAIJ" 87 static PetscErrorCode MatRestoreRowIJ_SeqSBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done) 88 { 89 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 90 PetscInt i,n = a->mbs,nz = a->i[n]; 91 PetscErrorCode ierr; 92 93 PetscFunctionBegin; 94 if (!ia) PetscFunctionReturn(0); 95 96 if (!blockcompressed) { 97 ierr = PetscFree2(*ia,*ja);CHKERRQ(ierr); 98 } else if (oshift == 1) { /* blockcompressed */ 99 for (i=0; i<nz; i++) a->j[i]--; 100 for (i=0; i<n+1; i++) a->i[i]--; 101 } 102 103 PetscFunctionReturn(0); 104 } 105 106 #undef __FUNCT__ 107 #define __FUNCT__ "MatDestroy_SeqSBAIJ" 108 PetscErrorCode MatDestroy_SeqSBAIJ(Mat A) 109 { 110 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 111 PetscErrorCode ierr; 112 113 PetscFunctionBegin; 114 #if defined(PETSC_USE_LOG) 115 PetscLogObjectState((PetscObject)A,"Rows=%D, NZ=%D",A->rmap.N,a->nz); 116 #endif 117 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 118 if (a->row) {ierr = ISDestroy(a->row);CHKERRQ(ierr);} 119 if (a->col){ierr = ISDestroy(a->col);CHKERRQ(ierr);} 120 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 121 ierr = PetscFree(a->diag);CHKERRQ(ierr); 122 ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr); 123 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 124 ierr = PetscFree(a->relax_work);CHKERRQ(ierr); 125 ierr = PetscFree(a->solves_work);CHKERRQ(ierr); 126 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 127 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 128 ierr = PetscFree(a->xtoy);CHKERRQ(ierr); 129 130 ierr = PetscFree(a->inew);CHKERRQ(ierr); 131 ierr = PetscFree(a);CHKERRQ(ierr); 132 133 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 134 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr); 135 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr); 136 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr); 137 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr); 138 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqsbaij_seqbaij_C","",PETSC_NULL);CHKERRQ(ierr); 139 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr); 140 PetscFunctionReturn(0); 141 } 142 143 #undef __FUNCT__ 144 #define __FUNCT__ "MatSetOption_SeqSBAIJ" 145 PetscErrorCode MatSetOption_SeqSBAIJ(Mat A,MatOption op) 146 { 147 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 148 PetscErrorCode ierr; 149 150 PetscFunctionBegin; 151 switch (op) { 152 case MAT_ROW_ORIENTED: 153 a->roworiented = PETSC_TRUE; 154 break; 155 case MAT_COLUMN_ORIENTED: 156 a->roworiented = PETSC_FALSE; 157 break; 158 case MAT_COLUMNS_SORTED: 159 a->sorted = PETSC_TRUE; 160 break; 161 case MAT_COLUMNS_UNSORTED: 162 a->sorted = PETSC_FALSE; 163 break; 164 case MAT_KEEP_ZEROED_ROWS: 165 a->keepzeroedrows = PETSC_TRUE; 166 break; 167 case MAT_NO_NEW_NONZERO_LOCATIONS: 168 a->nonew = 1; 169 break; 170 case MAT_NEW_NONZERO_LOCATION_ERR: 171 a->nonew = -1; 172 break; 173 case MAT_NEW_NONZERO_ALLOCATION_ERR: 174 a->nonew = -2; 175 break; 176 case MAT_YES_NEW_NONZERO_LOCATIONS: 177 a->nonew = 0; 178 break; 179 case MAT_ROWS_SORTED: 180 case MAT_ROWS_UNSORTED: 181 case MAT_YES_NEW_DIAGONALS: 182 case MAT_IGNORE_OFF_PROC_ENTRIES: 183 case MAT_USE_HASH_TABLE: 184 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 185 break; 186 case MAT_NO_NEW_DIAGONALS: 187 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 188 case MAT_NOT_SYMMETRIC: 189 case MAT_NOT_STRUCTURALLY_SYMMETRIC: 190 case MAT_HERMITIAN: 191 SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric"); 192 case MAT_SYMMETRIC: 193 case MAT_STRUCTURALLY_SYMMETRIC: 194 case MAT_NOT_HERMITIAN: 195 case MAT_SYMMETRY_ETERNAL: 196 case MAT_NOT_SYMMETRY_ETERNAL: 197 ierr = PetscInfo1(A,"Option %s not relevent\n",MatOptions[op]);CHKERRQ(ierr); 198 break; 199 case MAT_IGNORE_LOWER_TRIANGULAR: 200 a->ignore_ltriangular = PETSC_TRUE; 201 break; 202 case MAT_ERROR_LOWER_TRIANGULAR: 203 a->ignore_ltriangular = PETSC_FALSE; 204 break; 205 case MAT_GETROW_UPPERTRIANGULAR: 206 a->getrow_utriangular = PETSC_TRUE; 207 break; 208 default: 209 SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op); 210 } 211 PetscFunctionReturn(0); 212 } 213 214 #undef __FUNCT__ 215 #define __FUNCT__ "MatGetRow_SeqSBAIJ" 216 PetscErrorCode MatGetRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **v) 217 { 218 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 219 PetscErrorCode ierr; 220 PetscInt itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*cols_i,bs2; 221 MatScalar *aa,*aa_i; 222 PetscScalar *v_i; 223 224 PetscFunctionBegin; 225 if (A && !a->getrow_utriangular) SETERRQ(PETSC_ERR_SUP,"MatGetRow is not supported for SBAIJ matrix format. Getting the upper triangular part of row, run with -mat_getrow_uppertriangular, call MatSetOption(mat,MAT_GETROW_UPPERTRIANGULAR) or MatGetRowUpperTriangular()"); 226 /* Get the upper triangular part of the row */ 227 bs = A->rmap.bs; 228 ai = a->i; 229 aj = a->j; 230 aa = a->a; 231 bs2 = a->bs2; 232 233 if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Row %D out of range", row); 234 235 bn = row/bs; /* Block number */ 236 bp = row % bs; /* Block position */ 237 M = ai[bn+1] - ai[bn]; 238 *ncols = bs*M; 239 240 if (v) { 241 *v = 0; 242 if (*ncols) { 243 ierr = PetscMalloc((*ncols+row)*sizeof(PetscScalar),v);CHKERRQ(ierr); 244 for (i=0; i<M; i++) { /* for each block in the block row */ 245 v_i = *v + i*bs; 246 aa_i = aa + bs2*(ai[bn] + i); 247 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 248 } 249 } 250 } 251 252 if (cols) { 253 *cols = 0; 254 if (*ncols) { 255 ierr = PetscMalloc((*ncols+row)*sizeof(PetscInt),cols);CHKERRQ(ierr); 256 for (i=0; i<M; i++) { /* for each block in the block row */ 257 cols_i = *cols + i*bs; 258 itmp = bs*aj[ai[bn] + i]; 259 for (j=0; j<bs; j++) {cols_i[j] = itmp++;} 260 } 261 } 262 } 263 264 /*search column A(0:row-1,row) (=A(row,0:row-1)). Could be expensive! */ 265 /* this segment is currently removed, so only entries in the upper triangle are obtained */ 266 #ifdef column_search 267 v_i = *v + M*bs; 268 cols_i = *cols + M*bs; 269 for (i=0; i<bn; i++){ /* for each block row */ 270 M = ai[i+1] - ai[i]; 271 for (j=0; j<M; j++){ 272 itmp = aj[ai[i] + j]; /* block column value */ 273 if (itmp == bn){ 274 aa_i = aa + bs2*(ai[i] + j) + bs*bp; 275 for (k=0; k<bs; k++) { 276 *cols_i++ = i*bs+k; 277 *v_i++ = aa_i[k]; 278 } 279 *ncols += bs; 280 break; 281 } 282 } 283 } 284 #endif 285 PetscFunctionReturn(0); 286 } 287 288 #undef __FUNCT__ 289 #define __FUNCT__ "MatRestoreRow_SeqSBAIJ" 290 PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 291 { 292 PetscErrorCode ierr; 293 294 PetscFunctionBegin; 295 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 296 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 297 PetscFunctionReturn(0); 298 } 299 300 #undef __FUNCT__ 301 #define __FUNCT__ "MatGetRowUpperTriangular_SeqSBAIJ" 302 PetscErrorCode MatGetRowUpperTriangular_SeqSBAIJ(Mat A) 303 { 304 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 305 306 PetscFunctionBegin; 307 a->getrow_utriangular = PETSC_TRUE; 308 PetscFunctionReturn(0); 309 } 310 #undef __FUNCT__ 311 #define __FUNCT__ "MatRestoreRowUpperTriangular_SeqSBAIJ" 312 PetscErrorCode MatRestoreRowUpperTriangular_SeqSBAIJ(Mat A) 313 { 314 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 315 316 PetscFunctionBegin; 317 a->getrow_utriangular = PETSC_FALSE; 318 PetscFunctionReturn(0); 319 } 320 321 #undef __FUNCT__ 322 #define __FUNCT__ "MatTranspose_SeqSBAIJ" 323 PetscErrorCode MatTranspose_SeqSBAIJ(Mat A,Mat *B) 324 { 325 PetscErrorCode ierr; 326 PetscFunctionBegin; 327 ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr); 328 PetscFunctionReturn(0); 329 } 330 331 #undef __FUNCT__ 332 #define __FUNCT__ "MatView_SeqSBAIJ_ASCII" 333 static PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat A,PetscViewer viewer) 334 { 335 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 336 PetscErrorCode ierr; 337 PetscInt i,j,bs = A->rmap.bs,k,l,bs2=a->bs2; 338 const char *name; 339 PetscViewerFormat format; 340 341 PetscFunctionBegin; 342 ierr = PetscObjectGetName((PetscObject)A,&name);CHKERRQ(ierr); 343 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 344 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 345 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 346 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 347 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 348 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 349 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 350 for (i=0; i<a->mbs; i++) { 351 for (j=0; j<bs; j++) { 352 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 353 for (k=a->i[i]; k<a->i[i+1]; k++) { 354 for (l=0; l<bs; l++) { 355 #if defined(PETSC_USE_COMPLEX) 356 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 357 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 358 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 359 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 360 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 361 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 362 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 363 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 364 } 365 #else 366 if (a->a[bs2*k + l*bs + j] != 0.0) { 367 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 368 } 369 #endif 370 } 371 } 372 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 373 } 374 } 375 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 376 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 377 PetscFunctionReturn(0); 378 } else { 379 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 380 for (i=0; i<a->mbs; i++) { 381 for (j=0; j<bs; j++) { 382 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 383 for (k=a->i[i]; k<a->i[i+1]; k++) { 384 for (l=0; l<bs; l++) { 385 #if defined(PETSC_USE_COMPLEX) 386 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 387 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l, 388 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 389 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 390 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l, 391 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 392 } else { 393 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 394 } 395 #else 396 ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 397 #endif 398 } 399 } 400 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 401 } 402 } 403 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 404 } 405 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 406 PetscFunctionReturn(0); 407 } 408 409 #undef __FUNCT__ 410 #define __FUNCT__ "MatView_SeqSBAIJ_Draw_Zoom" 411 static PetscErrorCode MatView_SeqSBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 412 { 413 Mat A = (Mat) Aa; 414 Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)A->data; 415 PetscErrorCode ierr; 416 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2; 417 PetscMPIInt rank; 418 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 419 MatScalar *aa; 420 MPI_Comm comm; 421 PetscViewer viewer; 422 423 PetscFunctionBegin; 424 /* 425 This is nasty. If this is called from an originally parallel matrix 426 then all processes call this,but only the first has the matrix so the 427 rest should return immediately. 428 */ 429 ierr = PetscObjectGetComm((PetscObject)draw,&comm);CHKERRQ(ierr); 430 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 431 if (rank) PetscFunctionReturn(0); 432 433 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 434 435 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 436 PetscDrawString(draw, .3*(xl+xr), .3*(yl+yr), PETSC_DRAW_BLACK, "symmetric"); 437 438 /* loop over matrix elements drawing boxes */ 439 color = PETSC_DRAW_BLUE; 440 for (i=0,row=0; i<mbs; i++,row+=bs) { 441 for (j=a->i[i]; j<a->i[i+1]; j++) { 442 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 443 x_l = a->j[j]*bs; x_r = x_l + 1.0; 444 aa = a->a + j*bs2; 445 for (k=0; k<bs; k++) { 446 for (l=0; l<bs; l++) { 447 if (PetscRealPart(*aa++) >= 0.) continue; 448 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 449 } 450 } 451 } 452 } 453 color = PETSC_DRAW_CYAN; 454 for (i=0,row=0; i<mbs; i++,row+=bs) { 455 for (j=a->i[i]; j<a->i[i+1]; j++) { 456 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 457 x_l = a->j[j]*bs; x_r = x_l + 1.0; 458 aa = a->a + j*bs2; 459 for (k=0; k<bs; k++) { 460 for (l=0; l<bs; l++) { 461 if (PetscRealPart(*aa++) != 0.) continue; 462 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 463 } 464 } 465 } 466 } 467 468 color = PETSC_DRAW_RED; 469 for (i=0,row=0; i<mbs; i++,row+=bs) { 470 for (j=a->i[i]; j<a->i[i+1]; j++) { 471 y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0; 472 x_l = a->j[j]*bs; x_r = x_l + 1.0; 473 aa = a->a + j*bs2; 474 for (k=0; k<bs; k++) { 475 for (l=0; l<bs; l++) { 476 if (PetscRealPart(*aa++) <= 0.) continue; 477 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 478 } 479 } 480 } 481 } 482 PetscFunctionReturn(0); 483 } 484 485 #undef __FUNCT__ 486 #define __FUNCT__ "MatView_SeqSBAIJ_Draw" 487 static PetscErrorCode MatView_SeqSBAIJ_Draw(Mat A,PetscViewer viewer) 488 { 489 PetscErrorCode ierr; 490 PetscReal xl,yl,xr,yr,w,h; 491 PetscDraw draw; 492 PetscTruth isnull; 493 494 PetscFunctionBegin; 495 496 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 497 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 498 499 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 500 xr = A->rmap.N; yr = A->rmap.N; h = yr/10.0; w = xr/10.0; 501 xr += w; yr += h; xl = -w; yl = -h; 502 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 503 ierr = PetscDrawZoom(draw,MatView_SeqSBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 504 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 505 PetscFunctionReturn(0); 506 } 507 508 #undef __FUNCT__ 509 #define __FUNCT__ "MatView_SeqSBAIJ" 510 PetscErrorCode MatView_SeqSBAIJ(Mat A,PetscViewer viewer) 511 { 512 PetscErrorCode ierr; 513 PetscTruth iascii,isdraw; 514 515 PetscFunctionBegin; 516 if (A->factor){ 517 ierr = PetscPrintf(PETSC_COMM_SELF,"Warning: matrix is factored. MatView_SeqSBAIJ() may not display complete or logically correct entries!\n");CHKERRQ(ierr); 518 } 519 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 520 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 521 if (iascii){ 522 ierr = MatView_SeqSBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 523 } else if (isdraw) { 524 ierr = MatView_SeqSBAIJ_Draw(A,viewer);CHKERRQ(ierr); 525 } else { 526 Mat B; 527 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 528 ierr = MatView(B,viewer);CHKERRQ(ierr); 529 ierr = MatDestroy(B);CHKERRQ(ierr); 530 } 531 PetscFunctionReturn(0); 532 } 533 534 535 #undef __FUNCT__ 536 #define __FUNCT__ "MatGetValues_SeqSBAIJ" 537 PetscErrorCode MatGetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 538 { 539 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 540 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 541 PetscInt *ai = a->i,*ailen = a->ilen; 542 PetscInt brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2; 543 MatScalar *ap,*aa = a->a,zero = 0.0; 544 545 PetscFunctionBegin; 546 for (k=0; k<m; k++) { /* loop over rows */ 547 row = im[k]; brow = row/bs; 548 if (row < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",row); 549 if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1); 550 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 551 nrow = ailen[brow]; 552 for (l=0; l<n; l++) { /* loop over columns */ 553 if (in[l] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",in[l]); 554 if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1); 555 col = in[l] ; 556 bcol = col/bs; 557 cidx = col%bs; 558 ridx = row%bs; 559 high = nrow; 560 low = 0; /* assume unsorted */ 561 while (high-low > 5) { 562 t = (low+high)/2; 563 if (rp[t] > bcol) high = t; 564 else low = t; 565 } 566 for (i=low; i<high; i++) { 567 if (rp[i] > bcol) break; 568 if (rp[i] == bcol) { 569 *v++ = ap[bs2*i+bs*cidx+ridx]; 570 goto finished; 571 } 572 } 573 *v++ = zero; 574 finished:; 575 } 576 } 577 PetscFunctionReturn(0); 578 } 579 580 581 #undef __FUNCT__ 582 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ" 583 PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 584 { 585 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 586 PetscErrorCode ierr; 587 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 588 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 589 PetscInt *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval; 590 PetscTruth roworiented=a->roworiented; 591 const MatScalar *value = v; 592 MatScalar *ap,*aa = a->a,*bap; 593 594 PetscFunctionBegin; 595 if (roworiented) { 596 stepval = (n-1)*bs; 597 } else { 598 stepval = (m-1)*bs; 599 } 600 for (k=0; k<m; k++) { /* loop over added rows */ 601 row = im[k]; 602 if (row < 0) continue; 603 #if defined(PETSC_USE_DEBUG) 604 if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1); 605 #endif 606 rp = aj + ai[row]; 607 ap = aa + bs2*ai[row]; 608 rmax = imax[row]; 609 nrow = ailen[row]; 610 low = 0; 611 high = nrow; 612 for (l=0; l<n; l++) { /* loop over added columns */ 613 if (in[l] < 0) continue; 614 col = in[l]; 615 #if defined(PETSC_USE_DEBUG) 616 if (col >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",col,a->nbs-1); 617 #endif 618 if (col < row) continue; /* ignore lower triangular block */ 619 if (roworiented) { 620 value = v + k*(stepval+bs)*bs + l*bs; 621 } else { 622 value = v + l*(stepval+bs)*bs + k*bs; 623 } 624 if (col <= lastcol) low = 0; else high = nrow; 625 lastcol = col; 626 while (high-low > 7) { 627 t = (low+high)/2; 628 if (rp[t] > col) high = t; 629 else low = t; 630 } 631 for (i=low; i<high; i++) { 632 if (rp[i] > col) break; 633 if (rp[i] == col) { 634 bap = ap + bs2*i; 635 if (roworiented) { 636 if (is == ADD_VALUES) { 637 for (ii=0; ii<bs; ii++,value+=stepval) { 638 for (jj=ii; jj<bs2; jj+=bs) { 639 bap[jj] += *value++; 640 } 641 } 642 } else { 643 for (ii=0; ii<bs; ii++,value+=stepval) { 644 for (jj=ii; jj<bs2; jj+=bs) { 645 bap[jj] = *value++; 646 } 647 } 648 } 649 } else { 650 if (is == ADD_VALUES) { 651 for (ii=0; ii<bs; ii++,value+=stepval) { 652 for (jj=0; jj<bs; jj++) { 653 *bap++ += *value++; 654 } 655 } 656 } else { 657 for (ii=0; ii<bs; ii++,value+=stepval) { 658 for (jj=0; jj<bs; jj++) { 659 *bap++ = *value++; 660 } 661 } 662 } 663 } 664 goto noinsert2; 665 } 666 } 667 if (nonew == 1) goto noinsert2; 668 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 669 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 670 N = nrow++ - 1; high++; 671 /* shift up all the later entries in this row */ 672 for (ii=N; ii>=i; ii--) { 673 rp[ii+1] = rp[ii]; 674 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 675 } 676 if (N >= i) { 677 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 678 } 679 rp[i] = col; 680 bap = ap + bs2*i; 681 if (roworiented) { 682 for (ii=0; ii<bs; ii++,value+=stepval) { 683 for (jj=ii; jj<bs2; jj+=bs) { 684 bap[jj] = *value++; 685 } 686 } 687 } else { 688 for (ii=0; ii<bs; ii++,value+=stepval) { 689 for (jj=0; jj<bs; jj++) { 690 *bap++ = *value++; 691 } 692 } 693 } 694 noinsert2:; 695 low = i; 696 } 697 ailen[row] = nrow; 698 } 699 PetscFunctionReturn(0); 700 } 701 702 #undef __FUNCT__ 703 #define __FUNCT__ "MatAssemblyEnd_SeqSBAIJ" 704 PetscErrorCode MatAssemblyEnd_SeqSBAIJ(Mat A,MatAssemblyType mode) 705 { 706 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 707 PetscErrorCode ierr; 708 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 709 PetscInt m = A->rmap.N,*ip,N,*ailen = a->ilen; 710 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 711 MatScalar *aa = a->a,*ap; 712 713 PetscFunctionBegin; 714 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 715 716 if (m) rmax = ailen[0]; 717 for (i=1; i<mbs; i++) { 718 /* move each row back by the amount of empty slots (fshift) before it*/ 719 fshift += imax[i-1] - ailen[i-1]; 720 rmax = PetscMax(rmax,ailen[i]); 721 if (fshift) { 722 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 723 N = ailen[i]; 724 for (j=0; j<N; j++) { 725 ip[j-fshift] = ip[j]; 726 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 727 } 728 } 729 ai[i] = ai[i-1] + ailen[i-1]; 730 } 731 if (mbs) { 732 fshift += imax[mbs-1] - ailen[mbs-1]; 733 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 734 } 735 /* reset ilen and imax for each row */ 736 for (i=0; i<mbs; i++) { 737 ailen[i] = imax[i] = ai[i+1] - ai[i]; 738 } 739 a->nz = ai[mbs]; 740 741 /* diagonals may have moved, reset it */ 742 if (a->diag) { 743 ierr = PetscMemcpy(a->diag,ai,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 744 } 745 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->rmap.N,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 746 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 747 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 748 a->reallocs = 0; 749 A->info.nz_unneeded = (PetscReal)fshift*bs2; 750 PetscFunctionReturn(0); 751 } 752 753 /* 754 This function returns an array of flags which indicate the locations of contiguous 755 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 756 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 757 Assume: sizes should be long enough to hold all the values. 758 */ 759 #undef __FUNCT__ 760 #define __FUNCT__ "MatZeroRows_SeqSBAIJ_Check_Blocks" 761 PetscErrorCode MatZeroRows_SeqSBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 762 { 763 PetscInt i,j,k,row; 764 PetscTruth flg; 765 766 PetscFunctionBegin; 767 for (i=0,j=0; i<n; j++) { 768 row = idx[i]; 769 if (row%bs!=0) { /* Not the begining of a block */ 770 sizes[j] = 1; 771 i++; 772 } else if (i+bs > n) { /* Beginning of a block, but complete block doesn't exist (at idx end) */ 773 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 774 i++; 775 } else { /* Begining of the block, so check if the complete block exists */ 776 flg = PETSC_TRUE; 777 for (k=1; k<bs; k++) { 778 if (row+k != idx[i+k]) { /* break in the block */ 779 flg = PETSC_FALSE; 780 break; 781 } 782 } 783 if (flg) { /* No break in the bs */ 784 sizes[j] = bs; 785 i+= bs; 786 } else { 787 sizes[j] = 1; 788 i++; 789 } 790 } 791 } 792 *bs_max = j; 793 PetscFunctionReturn(0); 794 } 795 796 797 /* Only add/insert a(i,j) with i<=j (blocks). 798 Any a(i,j) with i>j input by user is ingored. 799 */ 800 801 #undef __FUNCT__ 802 #define __FUNCT__ "MatSetValues_SeqSBAIJ" 803 PetscErrorCode MatSetValues_SeqSBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 804 { 805 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 806 PetscErrorCode ierr; 807 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 808 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 809 PetscInt *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol; 810 PetscInt ridx,cidx,bs2=a->bs2; 811 MatScalar *ap,value,*aa=a->a,*bap; 812 813 PetscFunctionBegin; 814 for (k=0; k<m; k++) { /* loop over added rows */ 815 row = im[k]; /* row number */ 816 brow = row/bs; /* block row number */ 817 if (row < 0) continue; 818 #if defined(PETSC_USE_DEBUG) 819 if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1); 820 #endif 821 rp = aj + ai[brow]; /*ptr to beginning of column value of the row block*/ 822 ap = aa + bs2*ai[brow]; /*ptr to beginning of element value of the row block*/ 823 rmax = imax[brow]; /* maximum space allocated for this row */ 824 nrow = ailen[brow]; /* actual length of this row */ 825 low = 0; 826 827 for (l=0; l<n; l++) { /* loop over added columns */ 828 if (in[l] < 0) continue; 829 #if defined(PETSC_USE_DEBUG) 830 if (in[l] >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->rmap.N-1); 831 #endif 832 col = in[l]; 833 bcol = col/bs; /* block col number */ 834 835 if (brow > bcol) { 836 if (a->ignore_ltriangular){ 837 continue; /* ignore lower triangular values */ 838 } else { 839 SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR)"); 840 } 841 } 842 843 ridx = row % bs; cidx = col % bs; /*row and col index inside the block */ 844 if ((brow==bcol && ridx<=cidx) || (brow<bcol)){ 845 /* element value a(k,l) */ 846 if (roworiented) { 847 value = v[l + k*n]; 848 } else { 849 value = v[k + l*m]; 850 } 851 852 /* move pointer bap to a(k,l) quickly and add/insert value */ 853 if (col <= lastcol) low = 0; high = nrow; 854 lastcol = col; 855 while (high-low > 7) { 856 t = (low+high)/2; 857 if (rp[t] > bcol) high = t; 858 else low = t; 859 } 860 for (i=low; i<high; i++) { 861 if (rp[i] > bcol) break; 862 if (rp[i] == bcol) { 863 bap = ap + bs2*i + bs*cidx + ridx; 864 if (is == ADD_VALUES) *bap += value; 865 else *bap = value; 866 /* for diag block, add/insert its symmetric element a(cidx,ridx) */ 867 if (brow == bcol && ridx < cidx){ 868 bap = ap + bs2*i + bs*ridx + cidx; 869 if (is == ADD_VALUES) *bap += value; 870 else *bap = value; 871 } 872 goto noinsert1; 873 } 874 } 875 876 if (nonew == 1) goto noinsert1; 877 if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 878 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 879 880 N = nrow++ - 1; high++; 881 /* shift up all the later entries in this row */ 882 for (ii=N; ii>=i; ii--) { 883 rp[ii+1] = rp[ii]; 884 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 885 } 886 if (N>=i) { 887 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 888 } 889 rp[i] = bcol; 890 ap[bs2*i + bs*cidx + ridx] = value; 891 noinsert1:; 892 low = i; 893 } 894 } /* end of loop over added columns */ 895 ailen[brow] = nrow; 896 } /* end of loop over added rows */ 897 PetscFunctionReturn(0); 898 } 899 900 #undef __FUNCT__ 901 #define __FUNCT__ "MatICCFactor_SeqSBAIJ" 902 PetscErrorCode MatICCFactor_SeqSBAIJ(Mat inA,IS row,MatFactorInfo *info) 903 { 904 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)inA->data; 905 Mat outA; 906 PetscErrorCode ierr; 907 PetscTruth row_identity; 908 909 PetscFunctionBegin; 910 if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels=0 is supported for in-place icc"); 911 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 912 if (!row_identity) SETERRQ(PETSC_ERR_SUP,"Matrix reordering is not supported"); 913 if (inA->rmap.bs != 1) SETERRQ1(PETSC_ERR_SUP,"Matrix block size %D is not supported",inA->rmap.bs); /* Need to replace MatCholeskyFactorSymbolic_SeqSBAIJ_MSR()! */ 914 915 outA = inA; 916 inA->factor = FACTOR_CHOLESKY; 917 918 ierr = MatMarkDiagonal_SeqSBAIJ(inA);CHKERRQ(ierr); 919 /* 920 Blocksize < 8 have a special faster factorization/solver 921 for ICC(0) factorization with natural ordering 922 */ 923 switch (inA->rmap.bs){ /* Note: row_identity = PETSC_TRUE! */ 924 case 1: 925 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering; 926 inA->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 927 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 928 inA->ops->solves = MatSolves_SeqSBAIJ_1; 929 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 930 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 931 ierr = PetscInfo(inA,"Using special in-place natural ordering solvetrans BS=1\n");CHKERRQ(ierr); 932 break; 933 case 2: 934 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering; 935 inA->ops->solve = MatSolve_SeqSBAIJ_2_NaturalOrdering; 936 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering; 937 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_2_NaturalOrdering; 938 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering; 939 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=2\n");CHKERRQ(ierr); 940 break; 941 case 3: 942 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3_NaturalOrdering; 943 inA->ops->solve = MatSolve_SeqSBAIJ_3_NaturalOrdering; 944 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_3_NaturalOrdering; 945 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_3_NaturalOrdering; 946 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_3_NaturalOrdering; 947 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=3\n");CHKERRQ(ierr); 948 break; 949 case 4: 950 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4_NaturalOrdering; 951 inA->ops->solve = MatSolve_SeqSBAIJ_4_NaturalOrdering; 952 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_4_NaturalOrdering; 953 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_4_NaturalOrdering; 954 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_4_NaturalOrdering; 955 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=4\n");CHKERRQ(ierr); 956 break; 957 case 5: 958 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5_NaturalOrdering; 959 inA->ops->solve = MatSolve_SeqSBAIJ_5_NaturalOrdering; 960 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_5_NaturalOrdering; 961 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_5_NaturalOrdering; 962 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_5_NaturalOrdering; 963 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=5\n");CHKERRQ(ierr); 964 break; 965 case 6: 966 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6_NaturalOrdering; 967 inA->ops->solve = MatSolve_SeqSBAIJ_6_NaturalOrdering; 968 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_6_NaturalOrdering; 969 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_6_NaturalOrdering; 970 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_6_NaturalOrdering; 971 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=6\n");CHKERRQ(ierr); 972 break; 973 case 7: 974 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7_NaturalOrdering; 975 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_7_NaturalOrdering; 976 inA->ops->solve = MatSolve_SeqSBAIJ_7_NaturalOrdering; 977 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_7_NaturalOrdering; 978 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_7_NaturalOrdering; 979 ierr = PetscInfo(inA,"Using special in-place natural ordering factor and solve BS=7\n");CHKERRQ(ierr); 980 break; 981 default: 982 inA->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering; 983 inA->ops->solvetranspose = MatSolve_SeqSBAIJ_N_NaturalOrdering; 984 inA->ops->solve = MatSolve_SeqSBAIJ_N_NaturalOrdering; 985 inA->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_N_NaturalOrdering; 986 inA->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering; 987 break; 988 } 989 990 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 991 if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); } 992 a->row = row; 993 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 994 if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); } 995 a->col = row; 996 997 /* Create the invert permutation so that it can be used in MatCholeskyFactorNumeric() */ 998 if (a->icol) {ierr = ISInvertPermutation(row,PETSC_DECIDE, &a->icol);CHKERRQ(ierr);} 999 ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr); 1000 1001 if (!a->solve_work) { 1002 ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 1003 ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr); 1004 } 1005 1006 ierr = MatCholeskyFactorNumeric(inA,info,&outA);CHKERRQ(ierr); 1007 PetscFunctionReturn(0); 1008 } 1009 1010 EXTERN_C_BEGIN 1011 #undef __FUNCT__ 1012 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices_SeqSBAIJ" 1013 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices_SeqSBAIJ(Mat mat,PetscInt *indices) 1014 { 1015 Mat_SeqSBAIJ *baij = (Mat_SeqSBAIJ *)mat->data; 1016 PetscInt i,nz,n; 1017 1018 PetscFunctionBegin; 1019 nz = baij->maxnz; 1020 n = mat->cmap.n; 1021 for (i=0; i<nz; i++) { 1022 baij->j[i] = indices[i]; 1023 } 1024 baij->nz = nz; 1025 for (i=0; i<n; i++) { 1026 baij->ilen[i] = baij->imax[i]; 1027 } 1028 PetscFunctionReturn(0); 1029 } 1030 EXTERN_C_END 1031 1032 #undef __FUNCT__ 1033 #define __FUNCT__ "MatSeqSBAIJSetColumnIndices" 1034 /*@ 1035 MatSeqSBAIJSetColumnIndices - Set the column indices for all the rows 1036 in the matrix. 1037 1038 Input Parameters: 1039 + mat - the SeqSBAIJ matrix 1040 - indices - the column indices 1041 1042 Level: advanced 1043 1044 Notes: 1045 This can be called if you have precomputed the nonzero structure of the 1046 matrix and want to provide it to the matrix object to improve the performance 1047 of the MatSetValues() operation. 1048 1049 You MUST have set the correct numbers of nonzeros per row in the call to 1050 MatCreateSeqSBAIJ(), and the columns indices MUST be sorted. 1051 1052 MUST be called before any calls to MatSetValues() 1053 1054 .seealso: MatCreateSeqSBAIJ 1055 @*/ 1056 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetColumnIndices(Mat mat,PetscInt *indices) 1057 { 1058 PetscErrorCode ierr,(*f)(Mat,PetscInt *); 1059 1060 PetscFunctionBegin; 1061 PetscValidHeaderSpecific(mat,MAT_COOKIE,1); 1062 PetscValidPointer(indices,2); 1063 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqSBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 1064 if (f) { 1065 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1066 } else { 1067 SETERRQ(PETSC_ERR_SUP,"Wrong type of matrix to set column indices"); 1068 } 1069 PetscFunctionReturn(0); 1070 } 1071 1072 #undef __FUNCT__ 1073 #define __FUNCT__ "MatCopy_SeqSBAIJ" 1074 PetscErrorCode MatCopy_SeqSBAIJ(Mat A,Mat B,MatStructure str) 1075 { 1076 PetscErrorCode ierr; 1077 1078 PetscFunctionBegin; 1079 /* If the two matrices have the same copy implementation, use fast copy. */ 1080 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 1081 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1082 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1083 1084 if (a->i[A->rmap.N] != b->i[B->rmap.N]) { 1085 SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different"); 1086 } 1087 ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr); 1088 } else { 1089 ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr); 1090 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 1091 ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr); 1092 } 1093 PetscFunctionReturn(0); 1094 } 1095 1096 #undef __FUNCT__ 1097 #define __FUNCT__ "MatSetUpPreallocation_SeqSBAIJ" 1098 PetscErrorCode MatSetUpPreallocation_SeqSBAIJ(Mat A) 1099 { 1100 PetscErrorCode ierr; 1101 1102 PetscFunctionBegin; 1103 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr); 1104 PetscFunctionReturn(0); 1105 } 1106 1107 #undef __FUNCT__ 1108 #define __FUNCT__ "MatGetArray_SeqSBAIJ" 1109 PetscErrorCode MatGetArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1110 { 1111 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1112 PetscFunctionBegin; 1113 *array = a->a; 1114 PetscFunctionReturn(0); 1115 } 1116 1117 #undef __FUNCT__ 1118 #define __FUNCT__ "MatRestoreArray_SeqSBAIJ" 1119 PetscErrorCode MatRestoreArray_SeqSBAIJ(Mat A,PetscScalar *array[]) 1120 { 1121 PetscFunctionBegin; 1122 PetscFunctionReturn(0); 1123 } 1124 1125 #include "petscblaslapack.h" 1126 #undef __FUNCT__ 1127 #define __FUNCT__ "MatAXPY_SeqSBAIJ" 1128 PetscErrorCode MatAXPY_SeqSBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 1129 { 1130 Mat_SeqSBAIJ *x=(Mat_SeqSBAIJ *)X->data, *y=(Mat_SeqSBAIJ *)Y->data; 1131 PetscErrorCode ierr; 1132 PetscInt i,bs=Y->rmap.bs,bs2,j; 1133 PetscBLASInt bnz = (PetscBLASInt)x->nz,one = 1; 1134 1135 PetscFunctionBegin; 1136 if (str == SAME_NONZERO_PATTERN) { 1137 PetscScalar alpha = a; 1138 BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one); 1139 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1140 if (y->xtoy && y->XtoY != X) { 1141 ierr = PetscFree(y->xtoy);CHKERRQ(ierr); 1142 ierr = MatDestroy(y->XtoY);CHKERRQ(ierr); 1143 } 1144 if (!y->xtoy) { /* get xtoy */ 1145 ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr); 1146 y->XtoY = X; 1147 } 1148 bs2 = bs*bs; 1149 for (i=0; i<x->nz; i++) { 1150 j = 0; 1151 while (j < bs2){ 1152 y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]); 1153 j++; 1154 } 1155 } 1156 ierr = PetscInfo3(0,"ratio of nnz_s(X)/nnz_s(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr); 1157 } else { 1158 ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr); 1159 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 1160 ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr); 1161 } 1162 PetscFunctionReturn(0); 1163 } 1164 1165 #undef __FUNCT__ 1166 #define __FUNCT__ "MatIsSymmetric_SeqSBAIJ" 1167 PetscErrorCode MatIsSymmetric_SeqSBAIJ(Mat A,PetscReal tol,PetscTruth *flg) 1168 { 1169 PetscFunctionBegin; 1170 *flg = PETSC_TRUE; 1171 PetscFunctionReturn(0); 1172 } 1173 1174 #undef __FUNCT__ 1175 #define __FUNCT__ "MatIsStructurallySymmetric_SeqSBAIJ" 1176 PetscErrorCode MatIsStructurallySymmetric_SeqSBAIJ(Mat A,PetscTruth *flg) 1177 { 1178 PetscFunctionBegin; 1179 *flg = PETSC_TRUE; 1180 PetscFunctionReturn(0); 1181 } 1182 1183 #undef __FUNCT__ 1184 #define __FUNCT__ "MatIsHermitian_SeqSBAIJ" 1185 PetscErrorCode MatIsHermitian_SeqSBAIJ(Mat A,PetscTruth *flg) 1186 { 1187 PetscFunctionBegin; 1188 *flg = PETSC_FALSE; 1189 PetscFunctionReturn(0); 1190 } 1191 1192 #undef __FUNCT__ 1193 #define __FUNCT__ "MatRealPart_SeqSBAIJ" 1194 PetscErrorCode MatRealPart_SeqSBAIJ(Mat A) 1195 { 1196 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1197 PetscInt i,nz = a->bs2*a->i[a->mbs]; 1198 PetscScalar *aa = a->a; 1199 1200 PetscFunctionBegin; 1201 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 1202 PetscFunctionReturn(0); 1203 } 1204 1205 #undef __FUNCT__ 1206 #define __FUNCT__ "MatImaginaryPart_SeqSBAIJ" 1207 PetscErrorCode MatImaginaryPart_SeqSBAIJ(Mat A) 1208 { 1209 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1210 PetscInt i,nz = a->bs2*a->i[a->mbs]; 1211 PetscScalar *aa = a->a; 1212 1213 PetscFunctionBegin; 1214 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 1215 PetscFunctionReturn(0); 1216 } 1217 1218 /* -------------------------------------------------------------------*/ 1219 static struct _MatOps MatOps_Values = {MatSetValues_SeqSBAIJ, 1220 MatGetRow_SeqSBAIJ, 1221 MatRestoreRow_SeqSBAIJ, 1222 MatMult_SeqSBAIJ_N, 1223 /* 4*/ MatMultAdd_SeqSBAIJ_N, 1224 MatMult_SeqSBAIJ_N, /* transpose versions are same as non-transpose versions */ 1225 MatMultAdd_SeqSBAIJ_N, 1226 MatSolve_SeqSBAIJ_N, 1227 0, 1228 0, 1229 /*10*/ 0, 1230 0, 1231 MatCholeskyFactor_SeqSBAIJ, 1232 MatRelax_SeqSBAIJ, 1233 MatTranspose_SeqSBAIJ, 1234 /*15*/ MatGetInfo_SeqSBAIJ, 1235 MatEqual_SeqSBAIJ, 1236 MatGetDiagonal_SeqSBAIJ, 1237 MatDiagonalScale_SeqSBAIJ, 1238 MatNorm_SeqSBAIJ, 1239 /*20*/ 0, 1240 MatAssemblyEnd_SeqSBAIJ, 1241 0, 1242 MatSetOption_SeqSBAIJ, 1243 MatZeroEntries_SeqSBAIJ, 1244 /*25*/ 0, 1245 0, 1246 0, 1247 MatCholeskyFactorSymbolic_SeqSBAIJ, 1248 MatCholeskyFactorNumeric_SeqSBAIJ_N, 1249 /*30*/ MatSetUpPreallocation_SeqSBAIJ, 1250 0, 1251 MatICCFactorSymbolic_SeqSBAIJ, 1252 MatGetArray_SeqSBAIJ, 1253 MatRestoreArray_SeqSBAIJ, 1254 /*35*/ MatDuplicate_SeqSBAIJ, 1255 MatForwardSolve_SeqSBAIJ_N, 1256 MatBackwardSolve_SeqSBAIJ_N, 1257 0, 1258 MatICCFactor_SeqSBAIJ, 1259 /*40*/ MatAXPY_SeqSBAIJ, 1260 MatGetSubMatrices_SeqSBAIJ, 1261 MatIncreaseOverlap_SeqSBAIJ, 1262 MatGetValues_SeqSBAIJ, 1263 MatCopy_SeqSBAIJ, 1264 /*45*/ 0, 1265 MatScale_SeqSBAIJ, 1266 0, 1267 0, 1268 0, 1269 /*50*/ 0, 1270 MatGetRowIJ_SeqSBAIJ, 1271 MatRestoreRowIJ_SeqSBAIJ, 1272 0, 1273 0, 1274 /*55*/ 0, 1275 0, 1276 0, 1277 0, 1278 MatSetValuesBlocked_SeqSBAIJ, 1279 /*60*/ MatGetSubMatrix_SeqSBAIJ, 1280 0, 1281 0, 1282 0, 1283 0, 1284 /*65*/ 0, 1285 0, 1286 0, 1287 0, 1288 0, 1289 /*70*/ MatGetRowMaxAbs_SeqSBAIJ, 1290 0, 1291 0, 1292 0, 1293 0, 1294 /*75*/ 0, 1295 0, 1296 0, 1297 0, 1298 0, 1299 /*80*/ 0, 1300 0, 1301 0, 1302 #if !defined(PETSC_USE_COMPLEX) 1303 MatGetInertia_SeqSBAIJ, 1304 #else 1305 0, 1306 #endif 1307 MatLoad_SeqSBAIJ, 1308 /*85*/ MatIsSymmetric_SeqSBAIJ, 1309 MatIsHermitian_SeqSBAIJ, 1310 MatIsStructurallySymmetric_SeqSBAIJ, 1311 0, 1312 0, 1313 /*90*/ 0, 1314 0, 1315 0, 1316 0, 1317 0, 1318 /*95*/ 0, 1319 0, 1320 0, 1321 0, 1322 0, 1323 /*100*/0, 1324 0, 1325 0, 1326 0, 1327 0, 1328 /*105*/0, 1329 MatRealPart_SeqSBAIJ, 1330 MatImaginaryPart_SeqSBAIJ, 1331 MatGetRowUpperTriangular_SeqSBAIJ, 1332 MatRestoreRowUpperTriangular_SeqSBAIJ 1333 }; 1334 1335 EXTERN_C_BEGIN 1336 #undef __FUNCT__ 1337 #define __FUNCT__ "MatStoreValues_SeqSBAIJ" 1338 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqSBAIJ(Mat mat) 1339 { 1340 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1341 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 1342 PetscErrorCode ierr; 1343 1344 PetscFunctionBegin; 1345 if (aij->nonew != 1) { 1346 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1347 } 1348 1349 /* allocate space for values if not already there */ 1350 if (!aij->saved_values) { 1351 ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 1352 } 1353 1354 /* copy values over */ 1355 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1356 PetscFunctionReturn(0); 1357 } 1358 EXTERN_C_END 1359 1360 EXTERN_C_BEGIN 1361 #undef __FUNCT__ 1362 #define __FUNCT__ "MatRetrieveValues_SeqSBAIJ" 1363 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqSBAIJ(Mat mat) 1364 { 1365 Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)mat->data; 1366 PetscErrorCode ierr; 1367 PetscInt nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2; 1368 1369 PetscFunctionBegin; 1370 if (aij->nonew != 1) { 1371 SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1372 } 1373 if (!aij->saved_values) { 1374 SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 1375 } 1376 1377 /* copy values over */ 1378 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 1379 PetscFunctionReturn(0); 1380 } 1381 EXTERN_C_END 1382 1383 EXTERN_C_BEGIN 1384 #undef __FUNCT__ 1385 #define __FUNCT__ "MatSeqSBAIJSetPreallocation_SeqSBAIJ" 1386 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation_SeqSBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 1387 { 1388 Mat_SeqSBAIJ *b = (Mat_SeqSBAIJ*)B->data; 1389 PetscErrorCode ierr; 1390 PetscInt i,mbs,bs2; 1391 PetscTruth skipallocation = PETSC_FALSE,flg; 1392 1393 PetscFunctionBegin; 1394 B->preallocated = PETSC_TRUE; 1395 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1396 B->rmap.bs = B->cmap.bs = bs; 1397 ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr); 1398 ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr); 1399 1400 mbs = B->rmap.N/bs; 1401 bs2 = bs*bs; 1402 1403 if (mbs*bs != B->rmap.N) { 1404 SETERRQ(PETSC_ERR_ARG_SIZ,"Number rows, cols must be divisible by blocksize"); 1405 } 1406 1407 if (nz == MAT_SKIP_ALLOCATION) { 1408 skipallocation = PETSC_TRUE; 1409 nz = 0; 1410 } 1411 1412 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 3; 1413 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 1414 if (nnz) { 1415 for (i=0; i<mbs; i++) { 1416 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 1417 if (nnz[i] > mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],mbs); 1418 } 1419 } 1420 1421 ierr = PetscOptionsHasName(B->prefix,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1422 if (!flg) { 1423 switch (bs) { 1424 case 1: 1425 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1; 1426 B->ops->solve = MatSolve_SeqSBAIJ_1; 1427 B->ops->solves = MatSolves_SeqSBAIJ_1; 1428 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 1429 B->ops->mult = MatMult_SeqSBAIJ_1; 1430 B->ops->multadd = MatMultAdd_SeqSBAIJ_1; 1431 B->ops->multtranspose = MatMult_SeqSBAIJ_1; 1432 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_1; 1433 break; 1434 case 2: 1435 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_2; 1436 B->ops->solve = MatSolve_SeqSBAIJ_2; 1437 B->ops->solvetranspose = MatSolve_SeqSBAIJ_2; 1438 B->ops->mult = MatMult_SeqSBAIJ_2; 1439 B->ops->multadd = MatMultAdd_SeqSBAIJ_2; 1440 B->ops->multtranspose = MatMult_SeqSBAIJ_2; 1441 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_2; 1442 break; 1443 case 3: 1444 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_3; 1445 B->ops->solve = MatSolve_SeqSBAIJ_3; 1446 B->ops->solvetranspose = MatSolve_SeqSBAIJ_3; 1447 B->ops->mult = MatMult_SeqSBAIJ_3; 1448 B->ops->multadd = MatMultAdd_SeqSBAIJ_3; 1449 B->ops->multtranspose = MatMult_SeqSBAIJ_3; 1450 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_3; 1451 break; 1452 case 4: 1453 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_4; 1454 B->ops->solve = MatSolve_SeqSBAIJ_4; 1455 B->ops->solvetranspose = MatSolve_SeqSBAIJ_4; 1456 B->ops->mult = MatMult_SeqSBAIJ_4; 1457 B->ops->multadd = MatMultAdd_SeqSBAIJ_4; 1458 B->ops->multtranspose = MatMult_SeqSBAIJ_4; 1459 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_4; 1460 break; 1461 case 5: 1462 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_5; 1463 B->ops->solve = MatSolve_SeqSBAIJ_5; 1464 B->ops->solvetranspose = MatSolve_SeqSBAIJ_5; 1465 B->ops->mult = MatMult_SeqSBAIJ_5; 1466 B->ops->multadd = MatMultAdd_SeqSBAIJ_5; 1467 B->ops->multtranspose = MatMult_SeqSBAIJ_5; 1468 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_5; 1469 break; 1470 case 6: 1471 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_6; 1472 B->ops->solve = MatSolve_SeqSBAIJ_6; 1473 B->ops->solvetranspose = MatSolve_SeqSBAIJ_6; 1474 B->ops->mult = MatMult_SeqSBAIJ_6; 1475 B->ops->multadd = MatMultAdd_SeqSBAIJ_6; 1476 B->ops->multtranspose = MatMult_SeqSBAIJ_6; 1477 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_6; 1478 break; 1479 case 7: 1480 B->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_7; 1481 B->ops->solve = MatSolve_SeqSBAIJ_7; 1482 B->ops->solvetranspose = MatSolve_SeqSBAIJ_7; 1483 B->ops->mult = MatMult_SeqSBAIJ_7; 1484 B->ops->multadd = MatMultAdd_SeqSBAIJ_7; 1485 B->ops->multtranspose = MatMult_SeqSBAIJ_7; 1486 B->ops->multtransposeadd = MatMultAdd_SeqSBAIJ_7; 1487 break; 1488 } 1489 } 1490 1491 b->mbs = mbs; 1492 b->nbs = mbs; 1493 if (!skipallocation) { 1494 /* b->ilen will count nonzeros in each block row so far. */ 1495 ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr); 1496 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1497 if (!nnz) { 1498 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1499 else if (nz <= 0) nz = 1; 1500 for (i=0; i<mbs; i++) { 1501 b->imax[i] = nz; 1502 } 1503 nz = nz*mbs; /* total nz */ 1504 } else { 1505 nz = 0; 1506 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1507 } 1508 /* nz=(nz+mbs)/2; */ /* total diagonal and superdiagonal nonzero blocks */ 1509 1510 /* allocate the matrix space */ 1511 ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr); 1512 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1513 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1514 b->singlemalloc = PETSC_TRUE; 1515 1516 /* pointer to beginning of each row */ 1517 b->i[0] = 0; 1518 for (i=1; i<mbs+1; i++) { 1519 b->i[i] = b->i[i-1] + b->imax[i-1]; 1520 } 1521 b->free_a = PETSC_TRUE; 1522 b->free_ij = PETSC_TRUE; 1523 } else { 1524 b->free_a = PETSC_FALSE; 1525 b->free_ij = PETSC_FALSE; 1526 } 1527 1528 B->rmap.bs = bs; 1529 b->bs2 = bs2; 1530 b->nz = 0; 1531 b->maxnz = nz*bs2; 1532 1533 b->inew = 0; 1534 b->jnew = 0; 1535 b->anew = 0; 1536 b->a2anew = 0; 1537 b->permute = PETSC_FALSE; 1538 PetscFunctionReturn(0); 1539 } 1540 EXTERN_C_END 1541 1542 EXTERN_C_BEGIN 1543 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 1544 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqSBAIJ_SeqBAIJ(Mat, MatType,MatReuse,Mat*); 1545 EXTERN_C_END 1546 1547 /*MC 1548 MATSEQSBAIJ - MATSEQSBAIJ = "seqsbaij" - A matrix type to be used for sequential symmetric block sparse matrices, 1549 based on block compressed sparse row format. Only the upper triangular portion of the matrix is stored. 1550 1551 Options Database Keys: 1552 . -mat_type seqsbaij - sets the matrix type to "seqsbaij" during a call to MatSetFromOptions() 1553 1554 Level: beginner 1555 1556 .seealso: MatCreateSeqSBAIJ 1557 M*/ 1558 1559 EXTERN_C_BEGIN 1560 #undef __FUNCT__ 1561 #define __FUNCT__ "MatCreate_SeqSBAIJ" 1562 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqSBAIJ(Mat B) 1563 { 1564 Mat_SeqSBAIJ *b; 1565 PetscErrorCode ierr; 1566 PetscMPIInt size; 1567 PetscTruth flg; 1568 1569 PetscFunctionBegin; 1570 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1571 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1572 1573 ierr = PetscNew(Mat_SeqSBAIJ,&b);CHKERRQ(ierr); 1574 B->data = (void*)b; 1575 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1576 B->ops->destroy = MatDestroy_SeqSBAIJ; 1577 B->ops->view = MatView_SeqSBAIJ; 1578 B->factor = 0; 1579 B->mapping = 0; 1580 b->row = 0; 1581 b->icol = 0; 1582 b->reallocs = 0; 1583 b->saved_values = 0; 1584 1585 1586 b->sorted = PETSC_FALSE; 1587 b->roworiented = PETSC_TRUE; 1588 b->nonew = 0; 1589 b->diag = 0; 1590 b->solve_work = 0; 1591 b->mult_work = 0; 1592 B->spptr = 0; 1593 b->keepzeroedrows = PETSC_FALSE; 1594 b->xtoy = 0; 1595 b->XtoY = 0; 1596 1597 b->inew = 0; 1598 b->jnew = 0; 1599 b->anew = 0; 1600 b->a2anew = 0; 1601 b->permute = PETSC_FALSE; 1602 1603 b->ignore_ltriangular = PETSC_FALSE; 1604 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_ignore_lower_triangular",&flg);CHKERRQ(ierr); 1605 if (flg) b->ignore_ltriangular = PETSC_TRUE; 1606 1607 b->getrow_utriangular = PETSC_FALSE; 1608 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_getrow_uppertriangular",&flg);CHKERRQ(ierr); 1609 if (flg) b->getrow_utriangular = PETSC_TRUE; 1610 1611 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1612 "MatStoreValues_SeqSBAIJ", 1613 MatStoreValues_SeqSBAIJ);CHKERRQ(ierr); 1614 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1615 "MatRetrieveValues_SeqSBAIJ", 1616 (void*)MatRetrieveValues_SeqSBAIJ);CHKERRQ(ierr); 1617 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetColumnIndices_C", 1618 "MatSeqSBAIJSetColumnIndices_SeqSBAIJ", 1619 MatSeqSBAIJSetColumnIndices_SeqSBAIJ);CHKERRQ(ierr); 1620 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqaij_C", 1621 "MatConvert_SeqSBAIJ_SeqAIJ", 1622 MatConvert_SeqSBAIJ_SeqAIJ);CHKERRQ(ierr); 1623 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_seqbaij_C", 1624 "MatConvert_SeqSBAIJ_SeqBAIJ", 1625 MatConvert_SeqSBAIJ_SeqBAIJ);CHKERRQ(ierr); 1626 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqSBAIJSetPreallocation_C", 1627 "MatSeqSBAIJSetPreallocation_SeqSBAIJ", 1628 MatSeqSBAIJSetPreallocation_SeqSBAIJ);CHKERRQ(ierr); 1629 1630 B->symmetric = PETSC_TRUE; 1631 B->structurally_symmetric = PETSC_TRUE; 1632 B->symmetric_set = PETSC_TRUE; 1633 B->structurally_symmetric_set = PETSC_TRUE; 1634 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQSBAIJ);CHKERRQ(ierr); 1635 PetscFunctionReturn(0); 1636 } 1637 EXTERN_C_END 1638 1639 #undef __FUNCT__ 1640 #define __FUNCT__ "MatSeqSBAIJSetPreallocation" 1641 /*@C 1642 MatSeqSBAIJSetPreallocation - Creates a sparse symmetric matrix in block AIJ (block 1643 compressed row) format. For good matrix assembly performance the 1644 user should preallocate the matrix storage by setting the parameter nz 1645 (or the array nnz). By setting these parameters accurately, performance 1646 during matrix assembly can be increased by more than a factor of 50. 1647 1648 Collective on Mat 1649 1650 Input Parameters: 1651 + A - the symmetric matrix 1652 . bs - size of block 1653 . nz - number of block nonzeros per block row (same for all rows) 1654 - nnz - array containing the number of block nonzeros in the upper triangular plus 1655 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1656 1657 Options Database Keys: 1658 . -mat_no_unroll - uses code that does not unroll the loops in the 1659 block calculations (much slower) 1660 . -mat_block_size - size of the blocks to use 1661 1662 Level: intermediate 1663 1664 Notes: 1665 Specify the preallocated storage with either nz or nnz (not both). 1666 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1667 allocation. For additional details, see the users manual chapter on 1668 matrices. 1669 1670 If the nnz parameter is given then the nz parameter is ignored 1671 1672 1673 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1674 @*/ 1675 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqSBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 1676 { 1677 PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]); 1678 1679 PetscFunctionBegin; 1680 ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqSBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr); 1681 if (f) { 1682 ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr); 1683 } 1684 PetscFunctionReturn(0); 1685 } 1686 1687 #undef __FUNCT__ 1688 #define __FUNCT__ "MatCreateSeqSBAIJ" 1689 /*@C 1690 MatCreateSeqSBAIJ - Creates a sparse symmetric matrix in block AIJ (block 1691 compressed row) format. For good matrix assembly performance the 1692 user should preallocate the matrix storage by setting the parameter nz 1693 (or the array nnz). By setting these parameters accurately, performance 1694 during matrix assembly can be increased by more than a factor of 50. 1695 1696 Collective on MPI_Comm 1697 1698 Input Parameters: 1699 + comm - MPI communicator, set to PETSC_COMM_SELF 1700 . bs - size of block 1701 . m - number of rows, or number of columns 1702 . nz - number of block nonzeros per block row (same for all rows) 1703 - nnz - array containing the number of block nonzeros in the upper triangular plus 1704 diagonal portion of each block (possibly different for each block row) or PETSC_NULL 1705 1706 Output Parameter: 1707 . A - the symmetric matrix 1708 1709 Options Database Keys: 1710 . -mat_no_unroll - uses code that does not unroll the loops in the 1711 block calculations (much slower) 1712 . -mat_block_size - size of the blocks to use 1713 1714 Level: intermediate 1715 1716 Notes: 1717 1718 Specify the preallocated storage with either nz or nnz (not both). 1719 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1720 allocation. For additional details, see the users manual chapter on 1721 matrices. 1722 1723 If the nnz parameter is given then the nz parameter is ignored 1724 1725 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPISBAIJ() 1726 @*/ 1727 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 1728 { 1729 PetscErrorCode ierr; 1730 1731 PetscFunctionBegin; 1732 ierr = MatCreate(comm,A);CHKERRQ(ierr); 1733 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 1734 ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr); 1735 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 1736 PetscFunctionReturn(0); 1737 } 1738 1739 #undef __FUNCT__ 1740 #define __FUNCT__ "MatDuplicate_SeqSBAIJ" 1741 PetscErrorCode MatDuplicate_SeqSBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1742 { 1743 Mat C; 1744 Mat_SeqSBAIJ *c,*a = (Mat_SeqSBAIJ*)A->data; 1745 PetscErrorCode ierr; 1746 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 =a->bs2; 1747 1748 PetscFunctionBegin; 1749 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1750 1751 *B = 0; 1752 ierr = MatCreate(A->comm,&C);CHKERRQ(ierr); 1753 ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr); 1754 ierr = MatSetType(C,A->type_name);CHKERRQ(ierr); 1755 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1756 c = (Mat_SeqSBAIJ*)C->data; 1757 1758 C->preallocated = PETSC_TRUE; 1759 C->factor = A->factor; 1760 c->row = 0; 1761 c->icol = 0; 1762 c->saved_values = 0; 1763 c->keepzeroedrows = a->keepzeroedrows; 1764 C->assembled = PETSC_TRUE; 1765 1766 ierr = PetscMapCopy(A->comm,&A->rmap,&C->rmap);CHKERRQ(ierr); 1767 ierr = PetscMapCopy(A->comm,&A->cmap,&C->cmap);CHKERRQ(ierr); 1768 c->bs2 = a->bs2; 1769 c->mbs = a->mbs; 1770 c->nbs = a->nbs; 1771 1772 ierr = PetscMalloc2((mbs+1),PetscInt,&c->imax,(mbs+1),PetscInt,&c->ilen);CHKERRQ(ierr); 1773 for (i=0; i<mbs; i++) { 1774 c->imax[i] = a->imax[i]; 1775 c->ilen[i] = a->ilen[i]; 1776 } 1777 1778 /* allocate the matrix space */ 1779 ierr = PetscMalloc3(bs2*nz,MatScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr); 1780 c->singlemalloc = PETSC_TRUE; 1781 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1782 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt) + nz*(bs2*sizeof(MatScalar) + sizeof(PetscInt)));CHKERRQ(ierr); 1783 if (mbs > 0) { 1784 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 1785 if (cpvalues == MAT_COPY_VALUES) { 1786 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1787 } else { 1788 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1789 } 1790 } 1791 1792 c->sorted = a->sorted; 1793 c->roworiented = a->roworiented; 1794 c->nonew = a->nonew; 1795 1796 if (a->diag) { 1797 ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr); 1798 ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1799 for (i=0; i<mbs; i++) { 1800 c->diag[i] = a->diag[i]; 1801 } 1802 } else c->diag = 0; 1803 c->nz = a->nz; 1804 c->maxnz = a->maxnz; 1805 c->solve_work = 0; 1806 c->mult_work = 0; 1807 c->free_a = PETSC_TRUE; 1808 c->free_ij = PETSC_TRUE; 1809 *B = C; 1810 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1811 PetscFunctionReturn(0); 1812 } 1813 1814 #undef __FUNCT__ 1815 #define __FUNCT__ "MatLoad_SeqSBAIJ" 1816 PetscErrorCode MatLoad_SeqSBAIJ(PetscViewer viewer, MatType type,Mat *A) 1817 { 1818 Mat_SeqSBAIJ *a; 1819 Mat B; 1820 PetscErrorCode ierr; 1821 int fd; 1822 PetscMPIInt size; 1823 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs=1; 1824 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*s_browlengths,maskcount; 1825 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows; 1826 PetscInt *masked,nmask,tmp,bs2,ishift; 1827 PetscScalar *aa; 1828 MPI_Comm comm = ((PetscObject)viewer)->comm; 1829 1830 PetscFunctionBegin; 1831 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1832 bs2 = bs*bs; 1833 1834 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1835 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1836 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1837 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1838 if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1839 M = header[1]; N = header[2]; nz = header[3]; 1840 1841 if (header[3] < 0) { 1842 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqSBAIJ"); 1843 } 1844 1845 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1846 1847 /* 1848 This code adds extra rows to make sure the number of rows is 1849 divisible by the blocksize 1850 */ 1851 mbs = M/bs; 1852 extra_rows = bs - M + bs*(mbs); 1853 if (extra_rows == bs) extra_rows = 0; 1854 else mbs++; 1855 if (extra_rows) { 1856 ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 1857 } 1858 1859 /* read in row lengths */ 1860 ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr); 1861 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1862 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1863 1864 /* read in column indices */ 1865 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr); 1866 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1867 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1868 1869 /* loop over row lengths determining block row lengths */ 1870 ierr = PetscMalloc(mbs*sizeof(PetscInt),&s_browlengths);CHKERRQ(ierr); 1871 ierr = PetscMemzero(s_browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1872 ierr = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr); 1873 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 1874 masked = mask + mbs; 1875 rowcount = 0; nzcount = 0; 1876 for (i=0; i<mbs; i++) { 1877 nmask = 0; 1878 for (j=0; j<bs; j++) { 1879 kmax = rowlengths[rowcount]; 1880 for (k=0; k<kmax; k++) { 1881 tmp = jj[nzcount++]/bs; /* block col. index */ 1882 if (!mask[tmp] && tmp >= i) {masked[nmask++] = tmp; mask[tmp] = 1;} 1883 } 1884 rowcount++; 1885 } 1886 s_browlengths[i] += nmask; 1887 1888 /* zero out the mask elements we set */ 1889 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1890 } 1891 1892 /* create our matrix */ 1893 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 1894 ierr = MatSetSizes(B,M+extra_rows,N+extra_rows,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 1895 ierr = MatSetType(B,type);CHKERRQ(ierr); 1896 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(B,bs,0,s_browlengths);CHKERRQ(ierr); 1897 a = (Mat_SeqSBAIJ*)B->data; 1898 1899 /* set matrix "i" values */ 1900 a->i[0] = 0; 1901 for (i=1; i<= mbs; i++) { 1902 a->i[i] = a->i[i-1] + s_browlengths[i-1]; 1903 a->ilen[i-1] = s_browlengths[i-1]; 1904 } 1905 a->nz = a->i[mbs]; 1906 1907 /* read in nonzero values */ 1908 ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 1909 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1910 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1911 1912 /* set "a" and "j" values into matrix */ 1913 nzcount = 0; jcount = 0; 1914 for (i=0; i<mbs; i++) { 1915 nzcountb = nzcount; 1916 nmask = 0; 1917 for (j=0; j<bs; j++) { 1918 kmax = rowlengths[i*bs+j]; 1919 for (k=0; k<kmax; k++) { 1920 tmp = jj[nzcount++]/bs; /* block col. index */ 1921 if (!mask[tmp] && tmp >= i) { masked[nmask++] = tmp; mask[tmp] = 1;} 1922 } 1923 } 1924 /* sort the masked values */ 1925 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1926 1927 /* set "j" values into matrix */ 1928 maskcount = 1; 1929 for (j=0; j<nmask; j++) { 1930 a->j[jcount++] = masked[j]; 1931 mask[masked[j]] = maskcount++; 1932 } 1933 1934 /* set "a" values into matrix */ 1935 ishift = bs2*a->i[i]; 1936 for (j=0; j<bs; j++) { 1937 kmax = rowlengths[i*bs+j]; 1938 for (k=0; k<kmax; k++) { 1939 tmp = jj[nzcountb]/bs ; /* block col. index */ 1940 if (tmp >= i){ 1941 block = mask[tmp] - 1; 1942 point = jj[nzcountb] - bs*tmp; 1943 idx = ishift + bs2*block + j + bs*point; 1944 a->a[idx] = aa[nzcountb]; 1945 } 1946 nzcountb++; 1947 } 1948 } 1949 /* zero out the mask elements we set */ 1950 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1951 } 1952 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1953 1954 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1955 ierr = PetscFree(s_browlengths);CHKERRQ(ierr); 1956 ierr = PetscFree(aa);CHKERRQ(ierr); 1957 ierr = PetscFree(jj);CHKERRQ(ierr); 1958 ierr = PetscFree(mask);CHKERRQ(ierr); 1959 1960 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1961 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1962 ierr = MatView_Private(B);CHKERRQ(ierr); 1963 *A = B; 1964 PetscFunctionReturn(0); 1965 } 1966 1967 #undef __FUNCT__ 1968 #define __FUNCT__ "MatRelax_SeqSBAIJ" 1969 PetscErrorCode MatRelax_SeqSBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1970 { 1971 Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data; 1972 MatScalar *aa=a->a,*v,*v1; 1973 PetscScalar *x,*b,*t,sum,d; 1974 PetscErrorCode ierr; 1975 PetscInt m=a->mbs,bs=A->rmap.bs,*ai=a->i,*aj=a->j; 1976 PetscInt nz,nz1,*vj,*vj1,i; 1977 1978 PetscFunctionBegin; 1979 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat"); 1980 1981 its = its*lits; 1982 if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 1983 1984 if (bs > 1) 1985 SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented"); 1986 1987 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1988 if (xx != bb) { 1989 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1990 } else { 1991 b = x; 1992 } 1993 1994 if (!a->relax_work) { 1995 ierr = PetscMalloc(m*sizeof(PetscScalar),&a->relax_work);CHKERRQ(ierr); 1996 } 1997 t = a->relax_work; 1998 1999 if (flag & SOR_ZERO_INITIAL_GUESS) { 2000 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2001 ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr); 2002 2003 for (i=0; i<m; i++){ 2004 d = *(aa + ai[i]); /* diag[i] */ 2005 v = aa + ai[i] + 1; 2006 vj = aj + ai[i] + 1; 2007 nz = ai[i+1] - ai[i] - 1; 2008 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2009 x[i] = omega*t[i]/d; 2010 while (nz--) t[*vj++] -= x[i]*(*v++); /* update rhs */ 2011 } 2012 } 2013 2014 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2015 if (!(flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP)){ 2016 t = b; 2017 } 2018 2019 for (i=m-1; i>=0; i--){ 2020 d = *(aa + ai[i]); 2021 v = aa + ai[i] + 1; 2022 vj = aj + ai[i] + 1; 2023 nz = ai[i+1] - ai[i] - 1; 2024 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2025 sum = t[i]; 2026 while (nz--) sum -= x[*vj++]*(*v++); 2027 x[i] = (1-omega)*x[i] + omega*sum/d; 2028 } 2029 t = a->relax_work; 2030 } 2031 its--; 2032 } 2033 2034 while (its--) { 2035 /* 2036 forward sweep: 2037 for i=0,...,m-1: 2038 sum[i] = (b[i] - U(i,:)x )/d[i]; 2039 x[i] = (1-omega)x[i] + omega*sum[i]; 2040 b = b - x[i]*U^T(i,:); 2041 2042 */ 2043 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){ 2044 ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr); 2045 2046 for (i=0; i<m; i++){ 2047 d = *(aa + ai[i]); /* diag[i] */ 2048 v = aa + ai[i] + 1; v1=v; 2049 vj = aj + ai[i] + 1; vj1=vj; 2050 nz = ai[i+1] - ai[i] - 1; nz1=nz; 2051 sum = t[i]; 2052 ierr = PetscLogFlops(4*nz-2);CHKERRQ(ierr); 2053 while (nz1--) sum -= (*v1++)*x[*vj1++]; 2054 x[i] = (1-omega)*x[i] + omega*sum/d; 2055 while (nz--) t[*vj++] -= x[i]*(*v++); 2056 } 2057 } 2058 2059 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){ 2060 /* 2061 backward sweep: 2062 b = b - x[i]*U^T(i,:), i=0,...,n-2 2063 for i=m-1,...,0: 2064 sum[i] = (b[i] - U(i,:)x )/d[i]; 2065 x[i] = (1-omega)x[i] + omega*sum[i]; 2066 */ 2067 /* if there was a forward sweep done above then I thing the next two for loops are not needed */ 2068 ierr = PetscMemcpy(t,b,m*sizeof(PetscScalar));CHKERRQ(ierr); 2069 2070 for (i=0; i<m-1; i++){ /* update rhs */ 2071 v = aa + ai[i] + 1; 2072 vj = aj + ai[i] + 1; 2073 nz = ai[i+1] - ai[i] - 1; 2074 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2075 while (nz--) t[*vj++] -= x[i]*(*v++); 2076 } 2077 for (i=m-1; i>=0; i--){ 2078 d = *(aa + ai[i]); 2079 v = aa + ai[i] + 1; 2080 vj = aj + ai[i] + 1; 2081 nz = ai[i+1] - ai[i] - 1; 2082 ierr = PetscLogFlops(2*nz-1);CHKERRQ(ierr); 2083 sum = t[i]; 2084 while (nz--) sum -= x[*vj++]*(*v++); 2085 x[i] = (1-omega)*x[i] + omega*sum/d; 2086 } 2087 } 2088 } 2089 2090 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2091 if (bb != xx) { 2092 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 2093 } 2094 PetscFunctionReturn(0); 2095 } 2096 2097 #undef __FUNCT__ 2098 #define __FUNCT__ "MatCreateSeqSBAIJWithArrays" 2099 /*@ 2100 MatCreateSeqSBAIJWithArrays - Creates an sequential SBAIJ matrix using matrix elements 2101 (upper triangular entries in CSR format) provided by the user. 2102 2103 Collective on MPI_Comm 2104 2105 Input Parameters: 2106 + comm - must be an MPI communicator of size 1 2107 . bs - size of block 2108 . m - number of rows 2109 . n - number of columns 2110 . i - row indices 2111 . j - column indices 2112 - a - matrix values 2113 2114 Output Parameter: 2115 . mat - the matrix 2116 2117 Level: intermediate 2118 2119 Notes: 2120 The i, j, and a arrays are not copied by this routine, the user must free these arrays 2121 once the matrix is destroyed 2122 2123 You cannot set new nonzero locations into this matrix, that will generate an error. 2124 2125 The i and j indices are 0 based 2126 2127 .seealso: MatCreate(), MatCreateMPISBAIJ(), MatCreateSeqSBAIJ() 2128 2129 @*/ 2130 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqSBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat) 2131 { 2132 PetscErrorCode ierr; 2133 PetscInt ii; 2134 Mat_SeqSBAIJ *sbaij; 2135 2136 PetscFunctionBegin; 2137 if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 2138 if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 2139 2140 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 2141 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 2142 ierr = MatSetType(*mat,MATSEQSBAIJ);CHKERRQ(ierr); 2143 ierr = MatSeqSBAIJSetPreallocation_SeqSBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 2144 sbaij = (Mat_SeqSBAIJ*)(*mat)->data; 2145 ierr = PetscMalloc2(m,PetscInt,&sbaij->imax,m,PetscInt,&sbaij->ilen);CHKERRQ(ierr); 2146 2147 sbaij->i = i; 2148 sbaij->j = j; 2149 sbaij->a = a; 2150 sbaij->singlemalloc = PETSC_FALSE; 2151 sbaij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 2152 sbaij->free_a = PETSC_FALSE; 2153 sbaij->free_ij = PETSC_FALSE; 2154 2155 for (ii=0; ii<m; ii++) { 2156 sbaij->ilen[ii] = sbaij->imax[ii] = i[ii+1] - i[ii]; 2157 #if defined(PETSC_USE_DEBUG) 2158 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 2159 #endif 2160 } 2161 #if defined(PETSC_USE_DEBUG) 2162 for (ii=0; ii<sbaij->i[m]; ii++) { 2163 if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 2164 if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 2165 } 2166 #endif 2167 2168 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2169 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2170 PetscFunctionReturn(0); 2171 } 2172 2173 2174 2175 2176 2177