1 /*$Id: baij.c,v 1.235 2001/07/12 01:48:59 buschelm Exp buschelm $*/ 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "petscsys.h" /*I "petscmat.h" I*/ 8 #include "src/mat/impls/baij/seq/baij.h" 9 #include "src/vec/vecimpl.h" 10 #include "src/inline/spops.h" 11 12 /* UGLY, ugly, ugly 13 When MatScalar == Scalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 14 not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 15 inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 16 converts the entries into single precision and then calls ..._MatScalar() to put them 17 into the single precision data structures. 18 */ 19 #if defined(PETSC_USE_MAT_SINGLE) 20 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 21 #else 22 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 23 #endif 24 25 #define CHUNKSIZE 10 26 27 /* 28 Checks for missing diagonals 29 */ 30 #undef __FUNCT__ 31 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 32 int MatMissingDiagonal_SeqBAIJ(Mat A) 33 { 34 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 35 int *diag,*jj = a->j,i,ierr; 36 37 PetscFunctionBegin; 38 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 39 diag = a->diag; 40 for (i=0; i<a->mbs; i++) { 41 if (jj[diag[i]] != i) { 42 SETERRQ1(1,"Matrix is missing diagonal number %d",i); 43 } 44 } 45 PetscFunctionReturn(0); 46 } 47 48 #undef __FUNCT__ 49 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 50 int MatMarkDiagonal_SeqBAIJ(Mat A) 51 { 52 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 53 int i,j,*diag,m = a->mbs,ierr; 54 55 PetscFunctionBegin; 56 if (a->diag) PetscFunctionReturn(0); 57 58 ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr); 59 PetscLogObjectMemory(A,(m+1)*sizeof(int)); 60 for (i=0; i<m; i++) { 61 diag[i] = a->i[i+1]; 62 for (j=a->i[i]; j<a->i[i+1]; j++) { 63 if (a->j[j] == i) { 64 diag[i] = j; 65 break; 66 } 67 } 68 } 69 a->diag = diag; 70 PetscFunctionReturn(0); 71 } 72 73 74 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 75 76 #undef __FUNCT__ 77 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 78 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 79 { 80 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 81 int ierr,n = a->mbs,i; 82 83 PetscFunctionBegin; 84 *nn = n; 85 if (!ia) PetscFunctionReturn(0); 86 if (symmetric) { 87 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 88 } else if (oshift == 1) { 89 /* temporarily add 1 to i and j indices */ 90 int nz = a->i[n]; 91 for (i=0; i<nz; i++) a->j[i]++; 92 for (i=0; i<n+1; i++) a->i[i]++; 93 *ia = a->i; *ja = a->j; 94 } else { 95 *ia = a->i; *ja = a->j; 96 } 97 98 PetscFunctionReturn(0); 99 } 100 101 #undef __FUNCT__ 102 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 103 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 104 { 105 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 106 int i,n = a->mbs,ierr; 107 108 PetscFunctionBegin; 109 if (!ia) PetscFunctionReturn(0); 110 if (symmetric) { 111 ierr = PetscFree(*ia);CHKERRQ(ierr); 112 ierr = PetscFree(*ja);CHKERRQ(ierr); 113 } else if (oshift == 1) { 114 int nz = a->i[n]-1; 115 for (i=0; i<nz; i++) a->j[i]--; 116 for (i=0; i<n+1; i++) a->i[i]--; 117 } 118 PetscFunctionReturn(0); 119 } 120 121 #undef __FUNCT__ 122 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ" 123 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs) 124 { 125 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 126 127 PetscFunctionBegin; 128 *bs = baij->bs; 129 PetscFunctionReturn(0); 130 } 131 132 133 #undef __FUNCT__ 134 #define __FUNCT__ "MatDestroy_SeqBAIJ" 135 int MatDestroy_SeqBAIJ(Mat A) 136 { 137 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 138 int ierr; 139 140 PetscFunctionBegin; 141 #if defined(PETSC_USE_LOG) 142 PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz); 143 #endif 144 ierr = PetscFree(a->a);CHKERRQ(ierr); 145 if (!a->singlemalloc) { 146 ierr = PetscFree(a->i);CHKERRQ(ierr); 147 ierr = PetscFree(a->j);CHKERRQ(ierr); 148 } 149 if (a->row) { 150 ierr = ISDestroy(a->row);CHKERRQ(ierr); 151 } 152 if (a->col) { 153 ierr = ISDestroy(a->col);CHKERRQ(ierr); 154 } 155 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 156 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 157 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 158 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 159 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 160 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 161 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 162 #if defined(PETSC_USE_MAT_SINGLE) 163 if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);} 164 #endif 165 ierr = PetscFree(a);CHKERRQ(ierr); 166 PetscFunctionReturn(0); 167 } 168 169 #undef __FUNCT__ 170 #define __FUNCT__ "MatSetOption_SeqBAIJ" 171 int MatSetOption_SeqBAIJ(Mat A,MatOption op) 172 { 173 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 174 175 PetscFunctionBegin; 176 switch (op) { 177 case MAT_ROW_ORIENTED: 178 a->roworiented = PETSC_TRUE; 179 break; 180 case MAT_COLUMN_ORIENTED: 181 a->roworiented = PETSC_FALSE; 182 break; 183 case MAT_COLUMNS_SORTED: 184 a->sorted = PETSC_TRUE; 185 break; 186 case MAT_COLUMNS_UNSORTED: 187 a->sorted = PETSC_FALSE; 188 break; 189 case MAT_KEEP_ZEROED_ROWS: 190 a->keepzeroedrows = PETSC_TRUE; 191 break; 192 case MAT_NO_NEW_NONZERO_LOCATIONS: 193 a->nonew = 1; 194 break; 195 case MAT_NEW_NONZERO_LOCATION_ERR: 196 a->nonew = -1; 197 break; 198 case MAT_NEW_NONZERO_ALLOCATION_ERR: 199 a->nonew = -2; 200 break; 201 case MAT_YES_NEW_NONZERO_LOCATIONS: 202 a->nonew = 0; 203 break; 204 case MAT_ROWS_SORTED: 205 case MAT_ROWS_UNSORTED: 206 case MAT_YES_NEW_DIAGONALS: 207 case MAT_IGNORE_OFF_PROC_ENTRIES: 208 case MAT_USE_HASH_TABLE: 209 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 210 break; 211 case MAT_NO_NEW_DIAGONALS: 212 SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 213 break; 214 case MAT_USE_SINGLE_PRECISION_SOLVES: 215 if (a->bs==4) { 216 PetscTruth sse_enabled,single_prec,flg; 217 int ierr; 218 219 ierr = PetscSSEIsEnabled(&sse_enabled);CHKERRQ(ierr); 220 ierr = PetscOptionsGetLogical(PETSC_NULL,"-mat_single_precision_solves",&single_prec,&flg);CHKERRQ(ierr); 221 if (sse_enabled && flg && single_prec) { 222 a->single_precision_solves = PETSC_TRUE; 223 A->ops->solve = MatSolve_SeqBAIJ_Update; 224 A->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 225 break; 226 } 227 PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n"); 228 } 229 break; 230 default: 231 SETERRQ(PETSC_ERR_SUP,"unknown option"); 232 } 233 PetscFunctionReturn(0); 234 } 235 236 #undef __FUNCT__ 237 #define __FUNCT__ "MatGetOwnershipRange_SeqBAIJ" 238 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 239 { 240 PetscFunctionBegin; 241 if (m) *m = 0; 242 if (n) *n = A->m; 243 PetscFunctionReturn(0); 244 } 245 246 #undef __FUNCT__ 247 #define __FUNCT__ "MatGetRow_SeqBAIJ" 248 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 249 { 250 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 251 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr; 252 MatScalar *aa,*aa_i; 253 Scalar *v_i; 254 255 PetscFunctionBegin; 256 bs = a->bs; 257 ai = a->i; 258 aj = a->j; 259 aa = a->a; 260 bs2 = a->bs2; 261 262 if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range"); 263 264 bn = row/bs; /* Block number */ 265 bp = row % bs; /* Block Position */ 266 M = ai[bn+1] - ai[bn]; 267 *nz = bs*M; 268 269 if (v) { 270 *v = 0; 271 if (*nz) { 272 ierr = PetscMalloc((*nz)*sizeof(Scalar),v);CHKERRQ(ierr); 273 for (i=0; i<M; i++) { /* for each block in the block row */ 274 v_i = *v + i*bs; 275 aa_i = aa + bs2*(ai[bn] + i); 276 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 277 } 278 } 279 } 280 281 if (idx) { 282 *idx = 0; 283 if (*nz) { 284 ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr); 285 for (i=0; i<M; i++) { /* for each block in the block row */ 286 idx_i = *idx + i*bs; 287 itmp = bs*aj[ai[bn] + i]; 288 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 289 } 290 } 291 } 292 PetscFunctionReturn(0); 293 } 294 295 #undef __FUNCT__ 296 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 297 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 298 { 299 int ierr; 300 301 PetscFunctionBegin; 302 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 303 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 304 PetscFunctionReturn(0); 305 } 306 307 #undef __FUNCT__ 308 #define __FUNCT__ "MatTranspose_SeqBAIJ" 309 int MatTranspose_SeqBAIJ(Mat A,Mat *B) 310 { 311 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 312 Mat C; 313 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 314 int *rows,*cols,bs2=a->bs2; 315 Scalar *array; 316 317 PetscFunctionBegin; 318 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 319 ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr); 320 ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr); 321 322 #if defined(PETSC_USE_MAT_SINGLE) 323 ierr = PetscMalloc(a->bs2*a->nz*sizeof(Scalar),&array);CHKERRQ(ierr); 324 for (i=0; i<a->bs2*a->nz; i++) array[i] = (Scalar)a->a[i]; 325 #else 326 array = a->a; 327 #endif 328 329 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 330 ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr); 331 ierr = PetscFree(col);CHKERRQ(ierr); 332 ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr); 333 cols = rows + bs; 334 for (i=0; i<mbs; i++) { 335 cols[0] = i*bs; 336 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 337 len = ai[i+1] - ai[i]; 338 for (j=0; j<len; j++) { 339 rows[0] = (*aj++)*bs; 340 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 341 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 342 array += bs2; 343 } 344 } 345 ierr = PetscFree(rows);CHKERRQ(ierr); 346 #if defined(PETSC_USE_MAT_SINGLE) 347 ierr = PetscFree(array); 348 #endif 349 350 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 351 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 352 353 if (B) { 354 *B = C; 355 } else { 356 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 357 } 358 PetscFunctionReturn(0); 359 } 360 361 #undef __FUNCT__ 362 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 363 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 364 { 365 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 366 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 367 Scalar *aa; 368 FILE *file; 369 370 PetscFunctionBegin; 371 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 372 ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 373 col_lens[0] = MAT_COOKIE; 374 375 col_lens[1] = A->m; 376 col_lens[2] = A->n; 377 col_lens[3] = a->nz*bs2; 378 379 /* store lengths of each row and write (including header) to file */ 380 count = 0; 381 for (i=0; i<a->mbs; i++) { 382 for (j=0; j<bs; j++) { 383 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 384 } 385 } 386 ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 387 ierr = PetscFree(col_lens);CHKERRQ(ierr); 388 389 /* store column indices (zero start index) */ 390 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 391 count = 0; 392 for (i=0; i<a->mbs; i++) { 393 for (j=0; j<bs; j++) { 394 for (k=a->i[i]; k<a->i[i+1]; k++) { 395 for (l=0; l<bs; l++) { 396 jj[count++] = bs*a->j[k] + l; 397 } 398 } 399 } 400 } 401 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr); 402 ierr = PetscFree(jj);CHKERRQ(ierr); 403 404 /* store nonzero values */ 405 ierr = PetscMalloc((a->nz+1)*bs2*sizeof(Scalar),&aa);CHKERRQ(ierr); 406 count = 0; 407 for (i=0; i<a->mbs; i++) { 408 for (j=0; j<bs; j++) { 409 for (k=a->i[i]; k<a->i[i+1]; k++) { 410 for (l=0; l<bs; l++) { 411 aa[count++] = a->a[bs2*k + l*bs + j]; 412 } 413 } 414 } 415 } 416 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr); 417 ierr = PetscFree(aa);CHKERRQ(ierr); 418 419 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 420 if (file) { 421 fprintf(file,"-matload_block_size %d\n",a->bs); 422 } 423 PetscFunctionReturn(0); 424 } 425 426 #undef __FUNCT__ 427 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 428 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 429 { 430 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 431 int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 432 PetscViewerFormat format; 433 434 PetscFunctionBegin; 435 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 436 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_LONG) { 437 ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 438 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 439 SETERRQ(PETSC_ERR_SUP,"Matlab format not supported"); 440 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 441 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 442 for (i=0; i<a->mbs; i++) { 443 for (j=0; j<bs; j++) { 444 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 445 for (k=a->i[i]; k<a->i[i+1]; k++) { 446 for (l=0; l<bs; l++) { 447 #if defined(PETSC_USE_COMPLEX) 448 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 449 ierr = PetscViewerASCIIPrintf(viewer," %d %g + %gi",bs*a->j[k]+l, 450 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 451 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 452 ierr = PetscViewerASCIIPrintf(viewer," %d %g - %gi",bs*a->j[k]+l, 453 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 454 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 455 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 456 } 457 #else 458 if (a->a[bs2*k + l*bs + j] != 0.0) { 459 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 460 } 461 #endif 462 } 463 } 464 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 465 } 466 } 467 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 468 } else { 469 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 470 for (i=0; i<a->mbs; i++) { 471 for (j=0; j<bs; j++) { 472 ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 473 for (k=a->i[i]; k<a->i[i+1]; k++) { 474 for (l=0; l<bs; l++) { 475 #if defined(PETSC_USE_COMPLEX) 476 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 477 ierr = PetscViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l, 478 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 479 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 480 ierr = PetscViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l, 481 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 482 } else { 483 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 484 } 485 #else 486 ierr = PetscViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 487 #endif 488 } 489 } 490 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 491 } 492 } 493 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 494 } 495 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 496 PetscFunctionReturn(0); 497 } 498 499 #undef __FUNCT__ 500 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 501 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 502 { 503 Mat A = (Mat) Aa; 504 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 505 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2; 506 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 507 MatScalar *aa; 508 PetscViewer viewer; 509 510 PetscFunctionBegin; 511 512 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 513 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 514 515 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 516 517 /* loop over matrix elements drawing boxes */ 518 color = PETSC_DRAW_BLUE; 519 for (i=0,row=0; i<mbs; i++,row+=bs) { 520 for (j=a->i[i]; j<a->i[i+1]; j++) { 521 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 522 x_l = a->j[j]*bs; x_r = x_l + 1.0; 523 aa = a->a + j*bs2; 524 for (k=0; k<bs; k++) { 525 for (l=0; l<bs; l++) { 526 if (PetscRealPart(*aa++) >= 0.) continue; 527 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 528 } 529 } 530 } 531 } 532 color = PETSC_DRAW_CYAN; 533 for (i=0,row=0; i<mbs; i++,row+=bs) { 534 for (j=a->i[i]; j<a->i[i+1]; j++) { 535 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 536 x_l = a->j[j]*bs; x_r = x_l + 1.0; 537 aa = a->a + j*bs2; 538 for (k=0; k<bs; k++) { 539 for (l=0; l<bs; l++) { 540 if (PetscRealPart(*aa++) != 0.) continue; 541 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 542 } 543 } 544 } 545 } 546 547 color = PETSC_DRAW_RED; 548 for (i=0,row=0; i<mbs; i++,row+=bs) { 549 for (j=a->i[i]; j<a->i[i+1]; j++) { 550 y_l = A->m - row - 1.0; y_r = y_l + 1.0; 551 x_l = a->j[j]*bs; x_r = x_l + 1.0; 552 aa = a->a + j*bs2; 553 for (k=0; k<bs; k++) { 554 for (l=0; l<bs; l++) { 555 if (PetscRealPart(*aa++) <= 0.) continue; 556 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 557 } 558 } 559 } 560 } 561 PetscFunctionReturn(0); 562 } 563 564 #undef __FUNCT__ 565 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 566 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 567 { 568 int ierr; 569 PetscReal xl,yl,xr,yr,w,h; 570 PetscDraw draw; 571 PetscTruth isnull; 572 573 PetscFunctionBegin; 574 575 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 576 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 577 578 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 579 xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0; 580 xr += w; yr += h; xl = -w; yl = -h; 581 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 582 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 583 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 584 PetscFunctionReturn(0); 585 } 586 587 #undef __FUNCT__ 588 #define __FUNCT__ "MatView_SeqBAIJ" 589 int MatView_SeqBAIJ(Mat A,PetscViewer viewer) 590 { 591 int ierr; 592 PetscTruth issocket,isascii,isbinary,isdraw; 593 594 PetscFunctionBegin; 595 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 596 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 597 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 598 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 599 if (issocket) { 600 SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 601 } else if (isascii){ 602 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 603 } else if (isbinary) { 604 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 605 } else if (isdraw) { 606 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 607 } else { 608 SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name); 609 } 610 PetscFunctionReturn(0); 611 } 612 613 614 #undef __FUNCT__ 615 #define __FUNCT__ "MatGetValues_SeqBAIJ" 616 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 617 { 618 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 619 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 620 int *ai = a->i,*ailen = a->ilen; 621 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 622 MatScalar *ap,*aa = a->a,zero = 0.0; 623 624 PetscFunctionBegin; 625 for (k=0; k<m; k++) { /* loop over rows */ 626 row = im[k]; brow = row/bs; 627 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 628 if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 629 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 630 nrow = ailen[brow]; 631 for (l=0; l<n; l++) { /* loop over columns */ 632 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 633 if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 634 col = in[l] ; 635 bcol = col/bs; 636 cidx = col%bs; 637 ridx = row%bs; 638 high = nrow; 639 low = 0; /* assume unsorted */ 640 while (high-low > 5) { 641 t = (low+high)/2; 642 if (rp[t] > bcol) high = t; 643 else low = t; 644 } 645 for (i=low; i<high; i++) { 646 if (rp[i] > bcol) break; 647 if (rp[i] == bcol) { 648 *v++ = ap[bs2*i+bs*cidx+ridx]; 649 goto finished; 650 } 651 } 652 *v++ = zero; 653 finished:; 654 } 655 } 656 PetscFunctionReturn(0); 657 } 658 659 #if defined(PETSC_USE_MAT_SINGLE) 660 #undef __FUNCT__ 661 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 662 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 663 { 664 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 665 int ierr,i,N = m*n*b->bs2; 666 MatScalar *vsingle; 667 668 PetscFunctionBegin; 669 if (N > b->setvalueslen) { 670 if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 671 ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 672 b->setvalueslen = N; 673 } 674 vsingle = b->setvaluescopy; 675 for (i=0; i<N; i++) { 676 vsingle[i] = v[i]; 677 } 678 ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 679 PetscFunctionReturn(0); 680 } 681 #endif 682 683 684 #undef __FUNCT__ 685 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 686 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,int *im,int n,int *in,MatScalar *v,InsertMode is) 687 { 688 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 689 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 690 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 691 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 692 PetscTruth roworiented=a->roworiented; 693 MatScalar *value = v,*ap,*aa = a->a,*bap; 694 695 PetscFunctionBegin; 696 if (roworiented) { 697 stepval = (n-1)*bs; 698 } else { 699 stepval = (m-1)*bs; 700 } 701 for (k=0; k<m; k++) { /* loop over added rows */ 702 row = im[k]; 703 if (row < 0) continue; 704 #if defined(PETSC_USE_BOPT_g) 705 if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 706 #endif 707 rp = aj + ai[row]; 708 ap = aa + bs2*ai[row]; 709 rmax = imax[row]; 710 nrow = ailen[row]; 711 low = 0; 712 for (l=0; l<n; l++) { /* loop over added columns */ 713 if (in[l] < 0) continue; 714 #if defined(PETSC_USE_BOPT_g) 715 if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 716 #endif 717 col = in[l]; 718 if (roworiented) { 719 value = v + k*(stepval+bs)*bs + l*bs; 720 } else { 721 value = v + l*(stepval+bs)*bs + k*bs; 722 } 723 if (!sorted) low = 0; high = nrow; 724 while (high-low > 7) { 725 t = (low+high)/2; 726 if (rp[t] > col) high = t; 727 else low = t; 728 } 729 for (i=low; i<high; i++) { 730 if (rp[i] > col) break; 731 if (rp[i] == col) { 732 bap = ap + bs2*i; 733 if (roworiented) { 734 if (is == ADD_VALUES) { 735 for (ii=0; ii<bs; ii++,value+=stepval) { 736 for (jj=ii; jj<bs2; jj+=bs) { 737 bap[jj] += *value++; 738 } 739 } 740 } else { 741 for (ii=0; ii<bs; ii++,value+=stepval) { 742 for (jj=ii; jj<bs2; jj+=bs) { 743 bap[jj] = *value++; 744 } 745 } 746 } 747 } else { 748 if (is == ADD_VALUES) { 749 for (ii=0; ii<bs; ii++,value+=stepval) { 750 for (jj=0; jj<bs; jj++) { 751 *bap++ += *value++; 752 } 753 } 754 } else { 755 for (ii=0; ii<bs; ii++,value+=stepval) { 756 for (jj=0; jj<bs; jj++) { 757 *bap++ = *value++; 758 } 759 } 760 } 761 } 762 goto noinsert2; 763 } 764 } 765 if (nonew == 1) goto noinsert2; 766 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 767 if (nrow >= rmax) { 768 /* there is no extra room in row, therefore enlarge */ 769 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 770 MatScalar *new_a; 771 772 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 773 774 /* malloc new storage space */ 775 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 776 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 777 new_j = (int*)(new_a + bs2*new_nz); 778 new_i = new_j + new_nz; 779 780 /* copy over old data into new slots */ 781 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 782 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 783 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 784 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 785 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 786 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 787 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 788 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 789 /* free up old matrix storage */ 790 ierr = PetscFree(a->a);CHKERRQ(ierr); 791 if (!a->singlemalloc) { 792 ierr = PetscFree(a->i);CHKERRQ(ierr); 793 ierr = PetscFree(a->j);CHKERRQ(ierr); 794 } 795 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 796 a->singlemalloc = PETSC_TRUE; 797 798 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 799 rmax = imax[row] = imax[row] + CHUNKSIZE; 800 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 801 a->maxnz += bs2*CHUNKSIZE; 802 a->reallocs++; 803 a->nz++; 804 } 805 N = nrow++ - 1; 806 /* shift up all the later entries in this row */ 807 for (ii=N; ii>=i; ii--) { 808 rp[ii+1] = rp[ii]; 809 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 810 } 811 if (N >= i) { 812 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 813 } 814 rp[i] = col; 815 bap = ap + bs2*i; 816 if (roworiented) { 817 for (ii=0; ii<bs; ii++,value+=stepval) { 818 for (jj=ii; jj<bs2; jj+=bs) { 819 bap[jj] = *value++; 820 } 821 } 822 } else { 823 for (ii=0; ii<bs; ii++,value+=stepval) { 824 for (jj=0; jj<bs; jj++) { 825 *bap++ = *value++; 826 } 827 } 828 } 829 noinsert2:; 830 low = i; 831 } 832 ailen[row] = nrow; 833 } 834 PetscFunctionReturn(0); 835 } 836 837 838 #undef __FUNCT__ 839 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 840 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 841 { 842 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 843 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 844 int m = A->m,*ip,N,*ailen = a->ilen; 845 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 846 MatScalar *aa = a->a,*ap; 847 848 PetscFunctionBegin; 849 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 850 851 if (m) rmax = ailen[0]; 852 for (i=1; i<mbs; i++) { 853 /* move each row back by the amount of empty slots (fshift) before it*/ 854 fshift += imax[i-1] - ailen[i-1]; 855 rmax = PetscMax(rmax,ailen[i]); 856 if (fshift) { 857 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 858 N = ailen[i]; 859 for (j=0; j<N; j++) { 860 ip[j-fshift] = ip[j]; 861 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 862 } 863 } 864 ai[i] = ai[i-1] + ailen[i-1]; 865 } 866 if (mbs) { 867 fshift += imax[mbs-1] - ailen[mbs-1]; 868 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 869 } 870 /* reset ilen and imax for each row */ 871 for (i=0; i<mbs; i++) { 872 ailen[i] = imax[i] = ai[i+1] - ai[i]; 873 } 874 a->nz = ai[mbs]; 875 876 /* diagonals may have moved, so kill the diagonal pointers */ 877 if (fshift && a->diag) { 878 ierr = PetscFree(a->diag);CHKERRQ(ierr); 879 PetscLogObjectMemory(A,-(m+1)*sizeof(int)); 880 a->diag = 0; 881 } 882 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 883 m,A->n,a->bs,fshift*bs2,a->nz*bs2); 884 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n", 885 a->reallocs); 886 PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 887 a->reallocs = 0; 888 A->info.nz_unneeded = (PetscReal)fshift*bs2; 889 890 PetscFunctionReturn(0); 891 } 892 893 894 895 /* 896 This function returns an array of flags which indicate the locations of contiguous 897 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 898 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 899 Assume: sizes should be long enough to hold all the values. 900 */ 901 #undef __FUNCT__ 902 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 903 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 904 { 905 int i,j,k,row; 906 PetscTruth flg; 907 908 PetscFunctionBegin; 909 for (i=0,j=0; i<n; j++) { 910 row = idx[i]; 911 if (row%bs!=0) { /* Not the begining of a block */ 912 sizes[j] = 1; 913 i++; 914 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 915 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 916 i++; 917 } else { /* Begining of the block, so check if the complete block exists */ 918 flg = PETSC_TRUE; 919 for (k=1; k<bs; k++) { 920 if (row+k != idx[i+k]) { /* break in the block */ 921 flg = PETSC_FALSE; 922 break; 923 } 924 } 925 if (flg == PETSC_TRUE) { /* No break in the bs */ 926 sizes[j] = bs; 927 i+= bs; 928 } else { 929 sizes[j] = 1; 930 i++; 931 } 932 } 933 } 934 *bs_max = j; 935 PetscFunctionReturn(0); 936 } 937 938 #undef __FUNCT__ 939 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 940 int MatZeroRows_SeqBAIJ(Mat A,IS is,Scalar *diag) 941 { 942 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 943 int ierr,i,j,k,count,is_n,*is_idx,*rows; 944 int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max; 945 Scalar zero = 0.0; 946 MatScalar *aa; 947 948 PetscFunctionBegin; 949 /* Make a copy of the IS and sort it */ 950 ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr); 951 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 952 953 /* allocate memory for rows,sizes */ 954 ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr); 955 sizes = rows + is_n; 956 957 /* copy IS values to rows, and sort them */ 958 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 959 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 960 if (baij->keepzeroedrows) { 961 for (i=0; i<is_n; i++) { sizes[i] = 1; } 962 bs_max = is_n; 963 } else { 964 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 965 } 966 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 967 968 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 969 row = rows[j]; 970 if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row); 971 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 972 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 973 if (sizes[i] == bs && !baij->keepzeroedrows) { 974 if (diag) { 975 if (baij->ilen[row/bs] > 0) { 976 baij->ilen[row/bs] = 1; 977 baij->j[baij->i[row/bs]] = row/bs; 978 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 979 } 980 /* Now insert all the diagonal values for this bs */ 981 for (k=0; k<bs; k++) { 982 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 983 } 984 } else { /* (!diag) */ 985 baij->ilen[row/bs] = 0; 986 } /* end (!diag) */ 987 } else { /* (sizes[i] != bs) */ 988 #if defined (PETSC_USE_DEBUG) 989 if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1"); 990 #endif 991 for (k=0; k<count; k++) { 992 aa[0] = zero; 993 aa += bs; 994 } 995 if (diag) { 996 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 997 } 998 } 999 } 1000 1001 ierr = PetscFree(rows);CHKERRQ(ierr); 1002 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1003 PetscFunctionReturn(0); 1004 } 1005 1006 #undef __FUNCT__ 1007 #define __FUNCT__ "MatSetValues_SeqBAIJ" 1008 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 1009 { 1010 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1011 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 1012 int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1013 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 1014 int ridx,cidx,bs2=a->bs2,ierr; 1015 PetscTruth roworiented=a->roworiented; 1016 MatScalar *ap,value,*aa=a->a,*bap; 1017 1018 PetscFunctionBegin; 1019 for (k=0; k<m; k++) { /* loop over added rows */ 1020 row = im[k]; brow = row/bs; 1021 if (row < 0) continue; 1022 #if defined(PETSC_USE_BOPT_g) 1023 if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m); 1024 #endif 1025 rp = aj + ai[brow]; 1026 ap = aa + bs2*ai[brow]; 1027 rmax = imax[brow]; 1028 nrow = ailen[brow]; 1029 low = 0; 1030 for (l=0; l<n; l++) { /* loop over added columns */ 1031 if (in[l] < 0) continue; 1032 #if defined(PETSC_USE_BOPT_g) 1033 if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n); 1034 #endif 1035 col = in[l]; bcol = col/bs; 1036 ridx = row % bs; cidx = col % bs; 1037 if (roworiented) { 1038 value = v[l + k*n]; 1039 } else { 1040 value = v[k + l*m]; 1041 } 1042 if (!sorted) low = 0; high = nrow; 1043 while (high-low > 7) { 1044 t = (low+high)/2; 1045 if (rp[t] > bcol) high = t; 1046 else low = t; 1047 } 1048 for (i=low; i<high; i++) { 1049 if (rp[i] > bcol) break; 1050 if (rp[i] == bcol) { 1051 bap = ap + bs2*i + bs*cidx + ridx; 1052 if (is == ADD_VALUES) *bap += value; 1053 else *bap = value; 1054 goto noinsert1; 1055 } 1056 } 1057 if (nonew == 1) goto noinsert1; 1058 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 1059 if (nrow >= rmax) { 1060 /* there is no extra room in row, therefore enlarge */ 1061 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1062 MatScalar *new_a; 1063 1064 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 1065 1066 /* Malloc new storage space */ 1067 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1068 ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 1069 new_j = (int*)(new_a + bs2*new_nz); 1070 new_i = new_j + new_nz; 1071 1072 /* copy over old data into new slots */ 1073 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 1074 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1075 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 1076 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1077 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 1078 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1079 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1080 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1081 /* free up old matrix storage */ 1082 ierr = PetscFree(a->a);CHKERRQ(ierr); 1083 if (!a->singlemalloc) { 1084 ierr = PetscFree(a->i);CHKERRQ(ierr); 1085 ierr = PetscFree(a->j);CHKERRQ(ierr); 1086 } 1087 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1088 a->singlemalloc = PETSC_TRUE; 1089 1090 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1091 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1092 PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 1093 a->maxnz += bs2*CHUNKSIZE; 1094 a->reallocs++; 1095 a->nz++; 1096 } 1097 N = nrow++ - 1; 1098 /* shift up all the later entries in this row */ 1099 for (ii=N; ii>=i; ii--) { 1100 rp[ii+1] = rp[ii]; 1101 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1102 } 1103 if (N>=i) { 1104 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1105 } 1106 rp[i] = bcol; 1107 ap[bs2*i + bs*cidx + ridx] = value; 1108 noinsert1:; 1109 low = i; 1110 } 1111 ailen[brow] = nrow; 1112 } 1113 PetscFunctionReturn(0); 1114 } 1115 1116 1117 #undef __FUNCT__ 1118 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 1119 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info) 1120 { 1121 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1122 Mat outA; 1123 int ierr; 1124 PetscTruth row_identity,col_identity; 1125 1126 PetscFunctionBegin; 1127 if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1128 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1129 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1130 if (!row_identity || !col_identity) { 1131 SETERRQ(1,"Row and column permutations must be identity for in-place ILU"); 1132 } 1133 1134 outA = inA; 1135 inA->factor = FACTOR_LU; 1136 1137 if (!a->diag) { 1138 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1139 } 1140 /* 1141 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1142 for ILU(0) factorization with natural ordering 1143 */ 1144 if (a->bs < 8) { 1145 ierr = MatSeqBAIJ_UpdateFactor_NaturalOrdering(inA);CHKERRQ(ierr); 1146 } else { 1147 a->row = row; 1148 a->col = col; 1149 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1150 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1151 1152 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1153 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1154 PetscLogObjectParent(inA,a->icol); 1155 1156 if (!a->solve_work) { 1157 ierr = PetscMalloc((inA->m+a->bs)*sizeof(Scalar),&a->solve_work);CHKERRQ(ierr); 1158 PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(Scalar)); 1159 } 1160 } 1161 1162 ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr); 1163 1164 PetscFunctionReturn(0); 1165 } 1166 #undef __FUNCT__ 1167 #define __FUNCT__ "MatPrintHelp_SeqBAIJ" 1168 int MatPrintHelp_SeqBAIJ(Mat A) 1169 { 1170 static PetscTruth called = PETSC_FALSE; 1171 MPI_Comm comm = A->comm; 1172 int ierr; 1173 1174 PetscFunctionBegin; 1175 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1176 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1177 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1178 PetscFunctionReturn(0); 1179 } 1180 1181 EXTERN_C_BEGIN 1182 #undef __FUNCT__ 1183 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1184 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices) 1185 { 1186 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1187 int i,nz,n; 1188 1189 PetscFunctionBegin; 1190 nz = baij->maxnz; 1191 n = mat->n; 1192 for (i=0; i<nz; i++) { 1193 baij->j[i] = indices[i]; 1194 } 1195 baij->nz = nz; 1196 for (i=0; i<n; i++) { 1197 baij->ilen[i] = baij->imax[i]; 1198 } 1199 1200 PetscFunctionReturn(0); 1201 } 1202 EXTERN_C_END 1203 1204 #undef __FUNCT__ 1205 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 1206 /*@ 1207 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1208 in the matrix. 1209 1210 Input Parameters: 1211 + mat - the SeqBAIJ matrix 1212 - indices - the column indices 1213 1214 Level: advanced 1215 1216 Notes: 1217 This can be called if you have precomputed the nonzero structure of the 1218 matrix and want to provide it to the matrix object to improve the performance 1219 of the MatSetValues() operation. 1220 1221 You MUST have set the correct numbers of nonzeros per row in the call to 1222 MatCreateSeqBAIJ(). 1223 1224 MUST be called before any calls to MatSetValues(); 1225 1226 @*/ 1227 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices) 1228 { 1229 int ierr,(*f)(Mat,int *); 1230 1231 PetscFunctionBegin; 1232 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1233 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)())&f);CHKERRQ(ierr); 1234 if (f) { 1235 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1236 } else { 1237 SETERRQ(1,"Wrong type of matrix to set column indices"); 1238 } 1239 PetscFunctionReturn(0); 1240 } 1241 1242 #undef __FUNCT__ 1243 #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1244 int MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1245 { 1246 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1247 int ierr,i,j,n,row,bs,*ai,*aj,mbs; 1248 PetscReal atmp; 1249 Scalar *x,zero = 0.0; 1250 MatScalar *aa; 1251 int ncols,brow,krow,kcol; 1252 1253 PetscFunctionBegin; 1254 if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1255 bs = a->bs; 1256 aa = a->a; 1257 ai = a->i; 1258 aj = a->j; 1259 mbs = a->mbs; 1260 1261 ierr = VecSet(&zero,v);CHKERRQ(ierr); 1262 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1263 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1264 if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1265 for (i=0; i<mbs; i++) { 1266 ncols = ai[1] - ai[0]; ai++; 1267 brow = bs*i; 1268 for (j=0; j<ncols; j++){ 1269 /* bcol = bs*(*aj); */ 1270 for (kcol=0; kcol<bs; kcol++){ 1271 for (krow=0; krow<bs; krow++){ 1272 atmp = PetscAbsScalar(*aa); aa++; 1273 row = brow + krow; /* row index */ 1274 /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */ 1275 if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1276 } 1277 } 1278 aj++; 1279 } 1280 } 1281 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1282 PetscFunctionReturn(0); 1283 } 1284 1285 #undef __FUNCT__ 1286 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1287 int MatSetUpPreallocation_SeqBAIJ(Mat A) 1288 { 1289 int ierr; 1290 1291 PetscFunctionBegin; 1292 ierr = MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1293 PetscFunctionReturn(0); 1294 } 1295 1296 #undef __FUNCT__ 1297 #define __FUNCT__ "MatGetArray_SeqBAIJ" 1298 int MatGetArray_SeqBAIJ(Mat A,Scalar **array) 1299 { 1300 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1301 PetscFunctionBegin; 1302 *array = a->a; 1303 PetscFunctionReturn(0); 1304 } 1305 1306 #undef __FUNCT__ 1307 #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 1308 int MatRestoreArray_SeqBAIJ(Mat A,Scalar **array) 1309 { 1310 PetscFunctionBegin; 1311 PetscFunctionReturn(0); 1312 } 1313 1314 /* -------------------------------------------------------------------*/ 1315 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 1316 MatGetRow_SeqBAIJ, 1317 MatRestoreRow_SeqBAIJ, 1318 MatMult_SeqBAIJ_N, 1319 MatMultAdd_SeqBAIJ_N, 1320 MatMultTranspose_SeqBAIJ, 1321 MatMultTransposeAdd_SeqBAIJ, 1322 MatSolve_SeqBAIJ_N, 1323 0, 1324 0, 1325 0, 1326 MatLUFactor_SeqBAIJ, 1327 0, 1328 0, 1329 MatTranspose_SeqBAIJ, 1330 MatGetInfo_SeqBAIJ, 1331 MatEqual_SeqBAIJ, 1332 MatGetDiagonal_SeqBAIJ, 1333 MatDiagonalScale_SeqBAIJ, 1334 MatNorm_SeqBAIJ, 1335 0, 1336 MatAssemblyEnd_SeqBAIJ, 1337 0, 1338 MatSetOption_SeqBAIJ, 1339 MatZeroEntries_SeqBAIJ, 1340 MatZeroRows_SeqBAIJ, 1341 MatLUFactorSymbolic_SeqBAIJ, 1342 MatLUFactorNumeric_SeqBAIJ_N, 1343 0, 1344 0, 1345 MatSetUpPreallocation_SeqBAIJ, 1346 0, 1347 MatGetOwnershipRange_SeqBAIJ, 1348 MatILUFactorSymbolic_SeqBAIJ, 1349 0, 1350 MatGetArray_SeqBAIJ, 1351 MatRestoreArray_SeqBAIJ, 1352 MatDuplicate_SeqBAIJ, 1353 0, 1354 0, 1355 MatILUFactor_SeqBAIJ, 1356 0, 1357 0, 1358 MatGetSubMatrices_SeqBAIJ, 1359 MatIncreaseOverlap_SeqBAIJ, 1360 MatGetValues_SeqBAIJ, 1361 0, 1362 MatPrintHelp_SeqBAIJ, 1363 MatScale_SeqBAIJ, 1364 0, 1365 0, 1366 0, 1367 MatGetBlockSize_SeqBAIJ, 1368 MatGetRowIJ_SeqBAIJ, 1369 MatRestoreRowIJ_SeqBAIJ, 1370 0, 1371 0, 1372 0, 1373 0, 1374 0, 1375 0, 1376 MatSetValuesBlocked_SeqBAIJ, 1377 MatGetSubMatrix_SeqBAIJ, 1378 MatDestroy_SeqBAIJ, 1379 MatView_SeqBAIJ, 1380 MatGetMaps_Petsc, 1381 0, 1382 0, 1383 0, 1384 0, 1385 0, 1386 0, 1387 MatGetRowMax_SeqBAIJ, 1388 MatConvert_Basic}; 1389 1390 EXTERN_C_BEGIN 1391 #undef __FUNCT__ 1392 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 1393 int MatStoreValues_SeqBAIJ(Mat mat) 1394 { 1395 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1396 int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1397 int ierr; 1398 1399 PetscFunctionBegin; 1400 if (aij->nonew != 1) { 1401 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1402 } 1403 1404 /* allocate space for values if not already there */ 1405 if (!aij->saved_values) { 1406 ierr = PetscMalloc((nz+1)*sizeof(Scalar),&aij->saved_values);CHKERRQ(ierr); 1407 } 1408 1409 /* copy values over */ 1410 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(Scalar));CHKERRQ(ierr); 1411 PetscFunctionReturn(0); 1412 } 1413 EXTERN_C_END 1414 1415 EXTERN_C_BEGIN 1416 #undef __FUNCT__ 1417 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 1418 int MatRetrieveValues_SeqBAIJ(Mat mat) 1419 { 1420 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1421 int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 1422 1423 PetscFunctionBegin; 1424 if (aij->nonew != 1) { 1425 SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1426 } 1427 if (!aij->saved_values) { 1428 SETERRQ(1,"Must call MatStoreValues(A);first"); 1429 } 1430 1431 /* copy values over */ 1432 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(Scalar));CHKERRQ(ierr); 1433 PetscFunctionReturn(0); 1434 } 1435 EXTERN_C_END 1436 1437 EXTERN_C_BEGIN 1438 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*); 1439 EXTERN_C_END 1440 1441 EXTERN_C_BEGIN 1442 #undef __FUNCT__ 1443 #define __FUNCT__ "MatCreate_SeqBAIJ" 1444 int MatCreate_SeqBAIJ(Mat B) 1445 { 1446 int ierr,size; 1447 Mat_SeqBAIJ *b; 1448 1449 PetscFunctionBegin; 1450 ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 1451 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1452 1453 B->m = B->M = PetscMax(B->m,B->M); 1454 B->n = B->N = PetscMax(B->n,B->N); 1455 ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 1456 B->data = (void*)b; 1457 ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 1458 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1459 B->factor = 0; 1460 B->lupivotthreshold = 1.0; 1461 B->mapping = 0; 1462 b->row = 0; 1463 b->col = 0; 1464 b->icol = 0; 1465 b->reallocs = 0; 1466 b->saved_values = 0; 1467 #if defined(PETSC_USE_MAT_SINGLE) 1468 b->setvalueslen = 0; 1469 b->setvaluescopy = PETSC_NULL; 1470 #endif 1471 b->single_precision_solves = PETSC_FALSE; 1472 1473 ierr = MapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr); 1474 ierr = MapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr); 1475 1476 b->sorted = PETSC_FALSE; 1477 b->roworiented = PETSC_TRUE; 1478 b->nonew = 0; 1479 b->diag = 0; 1480 b->solve_work = 0; 1481 b->mult_work = 0; 1482 b->spptr = 0; 1483 B->info.nz_unneeded = (PetscReal)b->maxnz; 1484 b->keepzeroedrows = PETSC_FALSE; 1485 1486 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1487 "MatStoreValues_SeqBAIJ", 1488 MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 1489 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1490 "MatRetrieveValues_SeqBAIJ", 1491 MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 1492 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 1493 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 1494 MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 1495 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_SeqBAIJ_SeqAIJ_C", 1496 "MatConvert_SeqBAIJ_SeqAIJ", 1497 MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 1498 PetscFunctionReturn(0); 1499 } 1500 EXTERN_C_END 1501 1502 #undef __FUNCT__ 1503 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 1504 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1505 { 1506 Mat C; 1507 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 1508 int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr; 1509 1510 PetscFunctionBegin; 1511 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 1512 1513 *B = 0; 1514 ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1515 ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr); 1516 c = (Mat_SeqBAIJ*)C->data; 1517 1518 c->bs = a->bs; 1519 c->bs2 = a->bs2; 1520 c->mbs = a->mbs; 1521 c->nbs = a->nbs; 1522 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1523 1524 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1525 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1526 for (i=0; i<mbs; i++) { 1527 c->imax[i] = a->imax[i]; 1528 c->ilen[i] = a->ilen[i]; 1529 } 1530 1531 /* allocate the matrix space */ 1532 c->singlemalloc = PETSC_TRUE; 1533 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1534 ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 1535 c->j = (int*)(c->a + nz*bs2); 1536 c->i = c->j + nz; 1537 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1538 if (mbs > 0) { 1539 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1540 if (cpvalues == MAT_COPY_VALUES) { 1541 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1542 } else { 1543 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1544 } 1545 } 1546 1547 PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1548 c->sorted = a->sorted; 1549 c->roworiented = a->roworiented; 1550 c->nonew = a->nonew; 1551 1552 if (a->diag) { 1553 ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1554 PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1555 for (i=0; i<mbs; i++) { 1556 c->diag[i] = a->diag[i]; 1557 } 1558 } else c->diag = 0; 1559 c->nz = a->nz; 1560 c->maxnz = a->maxnz; 1561 c->solve_work = 0; 1562 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1563 c->mult_work = 0; 1564 C->preallocated = PETSC_TRUE; 1565 C->assembled = PETSC_TRUE; 1566 *B = C; 1567 ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1568 PetscFunctionReturn(0); 1569 } 1570 1571 EXTERN_C_BEGIN 1572 #undef __FUNCT__ 1573 #define __FUNCT__ "MatLoad_SeqBAIJ" 1574 int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A) 1575 { 1576 Mat_SeqBAIJ *a; 1577 Mat B; 1578 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1579 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1580 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1581 int *masked,nmask,tmp,bs2,ishift; 1582 Scalar *aa; 1583 MPI_Comm comm = ((PetscObject)viewer)->comm; 1584 1585 PetscFunctionBegin; 1586 ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1587 bs2 = bs*bs; 1588 1589 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1590 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1591 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1592 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1593 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 1594 M = header[1]; N = header[2]; nz = header[3]; 1595 1596 if (header[3] < 0) { 1597 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 1598 } 1599 1600 if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 1601 1602 /* 1603 This code adds extra rows to make sure the number of rows is 1604 divisible by the blocksize 1605 */ 1606 mbs = M/bs; 1607 extra_rows = bs - M + bs*(mbs); 1608 if (extra_rows == bs) extra_rows = 0; 1609 else mbs++; 1610 if (extra_rows) { 1611 PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 1612 } 1613 1614 /* read in row lengths */ 1615 ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 1616 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1617 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1618 1619 /* read in column indices */ 1620 ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 1621 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1622 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1623 1624 /* loop over row lengths determining block row lengths */ 1625 ierr = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr); 1626 ierr = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1627 ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1628 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1629 masked = mask + mbs; 1630 rowcount = 0; nzcount = 0; 1631 for (i=0; i<mbs; i++) { 1632 nmask = 0; 1633 for (j=0; j<bs; j++) { 1634 kmax = rowlengths[rowcount]; 1635 for (k=0; k<kmax; k++) { 1636 tmp = jj[nzcount++]/bs; 1637 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1638 } 1639 rowcount++; 1640 } 1641 browlengths[i] += nmask; 1642 /* zero out the mask elements we set */ 1643 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1644 } 1645 1646 /* create our matrix */ 1647 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr); 1648 B = *A; 1649 a = (Mat_SeqBAIJ*)B->data; 1650 1651 /* set matrix "i" values */ 1652 a->i[0] = 0; 1653 for (i=1; i<= mbs; i++) { 1654 a->i[i] = a->i[i-1] + browlengths[i-1]; 1655 a->ilen[i-1] = browlengths[i-1]; 1656 } 1657 a->nz = 0; 1658 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 1659 1660 /* read in nonzero values */ 1661 ierr = PetscMalloc((nz+extra_rows)*sizeof(Scalar),&aa);CHKERRQ(ierr); 1662 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1663 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1664 1665 /* set "a" and "j" values into matrix */ 1666 nzcount = 0; jcount = 0; 1667 for (i=0; i<mbs; i++) { 1668 nzcountb = nzcount; 1669 nmask = 0; 1670 for (j=0; j<bs; j++) { 1671 kmax = rowlengths[i*bs+j]; 1672 for (k=0; k<kmax; k++) { 1673 tmp = jj[nzcount++]/bs; 1674 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1675 } 1676 } 1677 /* sort the masked values */ 1678 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1679 1680 /* set "j" values into matrix */ 1681 maskcount = 1; 1682 for (j=0; j<nmask; j++) { 1683 a->j[jcount++] = masked[j]; 1684 mask[masked[j]] = maskcount++; 1685 } 1686 /* set "a" values into matrix */ 1687 ishift = bs2*a->i[i]; 1688 for (j=0; j<bs; j++) { 1689 kmax = rowlengths[i*bs+j]; 1690 for (k=0; k<kmax; k++) { 1691 tmp = jj[nzcountb]/bs ; 1692 block = mask[tmp] - 1; 1693 point = jj[nzcountb] - bs*tmp; 1694 idx = ishift + bs2*block + j + bs*point; 1695 a->a[idx] = (MatScalar)aa[nzcountb++]; 1696 } 1697 } 1698 /* zero out the mask elements we set */ 1699 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1700 } 1701 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1702 1703 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1704 ierr = PetscFree(browlengths);CHKERRQ(ierr); 1705 ierr = PetscFree(aa);CHKERRQ(ierr); 1706 ierr = PetscFree(jj);CHKERRQ(ierr); 1707 ierr = PetscFree(mask);CHKERRQ(ierr); 1708 1709 B->assembled = PETSC_TRUE; 1710 1711 ierr = MatView_Private(B);CHKERRQ(ierr); 1712 PetscFunctionReturn(0); 1713 } 1714 EXTERN_C_END 1715 1716 #undef __FUNCT__ 1717 #define __FUNCT__ "MatCreateSeqBAIJ" 1718 /*@C 1719 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1720 compressed row) format. For good matrix assembly performance the 1721 user should preallocate the matrix storage by setting the parameter nz 1722 (or the array nnz). By setting these parameters accurately, performance 1723 during matrix assembly can be increased by more than a factor of 50. 1724 1725 Collective on MPI_Comm 1726 1727 Input Parameters: 1728 + comm - MPI communicator, set to PETSC_COMM_SELF 1729 . bs - size of block 1730 . m - number of rows 1731 . n - number of columns 1732 . nz - number of nonzero blocks per block row (same for all rows) 1733 - nnz - array containing the number of nonzero blocks in the various block rows 1734 (possibly different for each block row) or PETSC_NULL 1735 1736 Output Parameter: 1737 . A - the matrix 1738 1739 Options Database Keys: 1740 . -mat_no_unroll - uses code that does not unroll the loops in the 1741 block calculations (much slower) 1742 . -mat_block_size - size of the blocks to use 1743 1744 Level: intermediate 1745 1746 Notes: 1747 A nonzero block is any block that as 1 or more nonzeros in it 1748 1749 The block AIJ format is fully compatible with standard Fortran 77 1750 storage. That is, the stored row and column indices can begin at 1751 either one (as in Fortran) or zero. See the users' manual for details. 1752 1753 Specify the preallocated storage with either nz or nnz (not both). 1754 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1755 allocation. For additional details, see the users manual chapter on 1756 matrices. 1757 1758 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1759 @*/ 1760 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A) 1761 { 1762 int ierr; 1763 1764 PetscFunctionBegin; 1765 ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1766 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 1767 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1768 PetscFunctionReturn(0); 1769 } 1770 1771 #undef __FUNCT__ 1772 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 1773 /*@C 1774 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 1775 per row in the matrix. For good matrix assembly performance the 1776 user should preallocate the matrix storage by setting the parameter nz 1777 (or the array nnz). By setting these parameters accurately, performance 1778 during matrix assembly can be increased by more than a factor of 50. 1779 1780 Collective on MPI_Comm 1781 1782 Input Parameters: 1783 + A - the matrix 1784 . bs - size of block 1785 . nz - number of block nonzeros per block row (same for all rows) 1786 - nnz - array containing the number of block nonzeros in the various block rows 1787 (possibly different for each block row) or PETSC_NULL 1788 1789 Options Database Keys: 1790 . -mat_no_unroll - uses code that does not unroll the loops in the 1791 block calculations (much slower) 1792 . -mat_block_size - size of the blocks to use 1793 1794 Level: intermediate 1795 1796 Notes: 1797 The block AIJ format is fully compatible with standard Fortran 77 1798 storage. That is, the stored row and column indices can begin at 1799 either one (as in Fortran) or zero. See the users' manual for details. 1800 1801 Specify the preallocated storage with either nz or nnz (not both). 1802 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1803 allocation. For additional details, see the users manual chapter on 1804 matrices. 1805 1806 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1807 @*/ 1808 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz) 1809 { 1810 Mat_SeqBAIJ *b; 1811 int i,len,ierr,mbs,nbs,bs2,newbs = bs; 1812 PetscTruth flg; 1813 1814 PetscFunctionBegin; 1815 ierr = PetscTypeCompare((PetscObject)B,MATSEQBAIJ,&flg);CHKERRQ(ierr); 1816 if (!flg) PetscFunctionReturn(0); 1817 1818 B->preallocated = PETSC_TRUE; 1819 ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr); 1820 if (nnz && newbs != bs) { 1821 SETERRQ(1,"Cannot change blocksize from command line if setting nnz"); 1822 } 1823 bs = newbs; 1824 1825 mbs = B->m/bs; 1826 nbs = B->n/bs; 1827 bs2 = bs*bs; 1828 1829 if (mbs*bs!=B->m || nbs*bs!=B->n) { 1830 SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs); 1831 } 1832 1833 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1834 if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1835 if (nnz) { 1836 for (i=0; i<mbs; i++) { 1837 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1838 if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs); 1839 } 1840 } 1841 1842 b = (Mat_SeqBAIJ*)B->data; 1843 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1844 B->ops->solve = MatSolve_SeqBAIJ_Update; 1845 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 1846 if (!flg) { 1847 switch (bs) { 1848 case 1: 1849 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1850 B->ops->mult = MatMult_SeqBAIJ_1; 1851 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 1852 break; 1853 case 2: 1854 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 1855 B->ops->mult = MatMult_SeqBAIJ_2; 1856 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 1857 break; 1858 case 3: 1859 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 1860 B->ops->mult = MatMult_SeqBAIJ_3; 1861 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 1862 break; 1863 case 4: 1864 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 1865 B->ops->mult = MatMult_SeqBAIJ_4; 1866 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 1867 break; 1868 case 5: 1869 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 1870 B->ops->mult = MatMult_SeqBAIJ_5; 1871 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 1872 break; 1873 case 6: 1874 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 1875 B->ops->mult = MatMult_SeqBAIJ_6; 1876 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 1877 break; 1878 case 7: 1879 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 1880 B->ops->mult = MatMult_SeqBAIJ_7; 1881 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 1882 break; 1883 default: 1884 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 1885 B->ops->mult = MatMult_SeqBAIJ_N; 1886 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 1887 break; 1888 } 1889 } 1890 b->bs = bs; 1891 b->mbs = mbs; 1892 b->nbs = nbs; 1893 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 1894 if (!nnz) { 1895 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1896 else if (nz <= 0) nz = 1; 1897 for (i=0; i<mbs; i++) b->imax[i] = nz; 1898 nz = nz*mbs; 1899 } else { 1900 nz = 0; 1901 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1902 } 1903 1904 /* allocate the matrix space */ 1905 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 1906 ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 1907 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1908 b->j = (int*)(b->a + nz*bs2); 1909 ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 1910 b->i = b->j + nz; 1911 b->singlemalloc = PETSC_TRUE; 1912 1913 b->i[0] = 0; 1914 for (i=1; i<mbs+1; i++) { 1915 b->i[i] = b->i[i-1] + b->imax[i-1]; 1916 } 1917 1918 /* b->ilen will count nonzeros in each block row so far. */ 1919 ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 1920 PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1921 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1922 1923 b->bs = bs; 1924 b->bs2 = bs2; 1925 b->mbs = mbs; 1926 b->nz = 0; 1927 b->maxnz = nz*bs2; 1928 B->info.nz_unneeded = (PetscReal)b->maxnz; 1929 PetscFunctionReturn(0); 1930 } 1931 1932