1 /*$Id: baij.c,v 1.197 2000/01/26 21:50:39 bsmith Exp balay $*/ 2 3 /* 4 Defines the basic matrix operations for the BAIJ (compressed row) 5 matrix storage format. 6 */ 7 #include "sys.h" 8 #include "src/mat/impls/baij/seq/baij.h" 9 #include "src/vec/vecimpl.h" 10 #include "src/inline/spops.h" 11 12 #define CHUNKSIZE 10 13 14 /* 15 Checks for missing diagonals 16 */ 17 #undef __FUNC__ 18 #define __FUNC__ "MatMissingDiagonal_SeqBAIJ" 19 int MatMissingDiagonal_SeqBAIJ(Mat A) 20 { 21 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 22 int *diag,*jj = a->j,i,ierr; 23 24 PetscFunctionBegin; 25 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 26 diag = a->diag; 27 for (i=0; i<a->mbs; i++) { 28 if (jj[diag[i]] != i) { 29 SETERRQ1(1,1,"Matrix is missing diagonal number %d",i); 30 } 31 } 32 PetscFunctionReturn(0); 33 } 34 35 #undef __FUNC__ 36 #define __FUNC__ "MatMarkDiagonal_SeqBAIJ" 37 int MatMarkDiagonal_SeqBAIJ(Mat A) 38 { 39 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 40 int i,j,*diag,m = a->mbs; 41 42 PetscFunctionBegin; 43 if (a->diag) PetscFunctionReturn(0); 44 45 diag = (int*)PetscMalloc((m+1)*sizeof(int));CHKPTRQ(diag); 46 PLogObjectMemory(A,(m+1)*sizeof(int)); 47 for (i=0; i<m; i++) { 48 diag[i] = a->i[i+1]; 49 for (j=a->i[i]; j<a->i[i+1]; j++) { 50 if (a->j[j] == i) { 51 diag[i] = j; 52 break; 53 } 54 } 55 } 56 a->diag = diag; 57 PetscFunctionReturn(0); 58 } 59 60 61 extern int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 62 63 #undef __FUNC__ 64 #define __FUNC__ "MatGetRowIJ_SeqBAIJ" 65 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 66 { 67 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 68 int ierr,n = a->mbs,i; 69 70 PetscFunctionBegin; 71 *nn = n; 72 if (!ia) PetscFunctionReturn(0); 73 if (symmetric) { 74 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 75 } else if (oshift == 1) { 76 /* temporarily add 1 to i and j indices */ 77 int nz = a->i[n] + 1; 78 for (i=0; i<nz; i++) a->j[i]++; 79 for (i=0; i<n+1; i++) a->i[i]++; 80 *ia = a->i; *ja = a->j; 81 } else { 82 *ia = a->i; *ja = a->j; 83 } 84 85 PetscFunctionReturn(0); 86 } 87 88 #undef __FUNC__ 89 #define __FUNC__ "MatRestoreRowIJ_SeqBAIJ" 90 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja, 91 PetscTruth *done) 92 { 93 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 94 int i,n = a->mbs,ierr; 95 96 PetscFunctionBegin; 97 if (!ia) PetscFunctionReturn(0); 98 if (symmetric) { 99 ierr = PetscFree(*ia);CHKERRQ(ierr); 100 ierr = PetscFree(*ja);CHKERRQ(ierr); 101 } else if (oshift == 1) { 102 int nz = a->i[n]; 103 for (i=0; i<nz; i++) a->j[i]--; 104 for (i=0; i<n+1; i++) a->i[i]--; 105 } 106 PetscFunctionReturn(0); 107 } 108 109 #undef __FUNC__ 110 #define __FUNC__ "MatGetBlockSize_SeqBAIJ" 111 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs) 112 { 113 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 114 115 PetscFunctionBegin; 116 *bs = baij->bs; 117 PetscFunctionReturn(0); 118 } 119 120 121 #undef __FUNC__ 122 #define __FUNC__ "MatDestroy_SeqBAIJ" 123 int MatDestroy_SeqBAIJ(Mat A) 124 { 125 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 126 int ierr; 127 128 PetscFunctionBegin; 129 if (A->mapping) { 130 ierr = ISLocalToGlobalMappingDestroy(A->mapping);CHKERRQ(ierr); 131 } 132 if (A->bmapping) { 133 ierr = ISLocalToGlobalMappingDestroy(A->bmapping);CHKERRQ(ierr); 134 } 135 if (A->rmap) { 136 ierr = MapDestroy(A->rmap);CHKERRQ(ierr); 137 } 138 if (A->cmap) { 139 ierr = MapDestroy(A->cmap);CHKERRQ(ierr); 140 } 141 #if defined(PETSC_USE_LOG) 142 PLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",a->m,a->n,a->nz); 143 #endif 144 ierr = PetscFree(a->a);CHKERRQ(ierr); 145 if (!a->singlemalloc) { 146 ierr = PetscFree(a->i);CHKERRQ(ierr); 147 ierr = PetscFree(a->j);CHKERRQ(ierr); 148 } 149 if (a->row) { 150 ierr = ISDestroy(a->row);CHKERRQ(ierr); 151 } 152 if (a->col) { 153 ierr = ISDestroy(a->col);CHKERRQ(ierr); 154 } 155 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 156 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 157 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 158 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 159 if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 160 if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 161 if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 162 ierr = PetscFree(a);CHKERRQ(ierr); 163 PLogObjectDestroy(A); 164 PetscHeaderDestroy(A); 165 PetscFunctionReturn(0); 166 } 167 168 #undef __FUNC__ 169 #define __FUNC__ "MatSetOption_SeqBAIJ" 170 int MatSetOption_SeqBAIJ(Mat A,MatOption op) 171 { 172 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 173 174 PetscFunctionBegin; 175 if (op == MAT_ROW_ORIENTED) a->roworiented = PETSC_TRUE; 176 else if (op == MAT_COLUMN_ORIENTED) a->roworiented = PETSC_FALSE; 177 else if (op == MAT_COLUMNS_SORTED) a->sorted = PETSC_TRUE; 178 else if (op == MAT_COLUMNS_UNSORTED) a->sorted = PETSC_FALSE; 179 else if (op == MAT_KEEP_ZEROED_ROWS) a->keepzeroedrows = PETSC_TRUE; 180 else if (op == MAT_NO_NEW_NONZERO_LOCATIONS) a->nonew = 1; 181 else if (op == MAT_NEW_NONZERO_LOCATION_ERR) a->nonew = -1; 182 else if (op == MAT_NEW_NONZERO_ALLOCATION_ERR) a->nonew = -2; 183 else if (op == MAT_YES_NEW_NONZERO_LOCATIONS) a->nonew = 0; 184 else if (op == MAT_ROWS_SORTED || 185 op == MAT_ROWS_UNSORTED || 186 op == MAT_SYMMETRIC || 187 op == MAT_STRUCTURALLY_SYMMETRIC || 188 op == MAT_YES_NEW_DIAGONALS || 189 op == MAT_IGNORE_OFF_PROC_ENTRIES || 190 op == MAT_USE_HASH_TABLE) { 191 PLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 192 } else if (op == MAT_NO_NEW_DIAGONALS) { 193 SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 194 } else { 195 SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 196 } 197 PetscFunctionReturn(0); 198 } 199 200 201 #undef __FUNC__ 202 #define __FUNC__ "MatGetSize_SeqBAIJ" 203 int MatGetSize_SeqBAIJ(Mat A,int *m,int *n) 204 { 205 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 206 207 PetscFunctionBegin; 208 if (m) *m = a->m; 209 if (n) *n = a->n; 210 PetscFunctionReturn(0); 211 } 212 213 #undef __FUNC__ 214 #define __FUNC__ "MatGetOwnershipRange_SeqBAIJ" 215 int MatGetOwnershipRange_SeqBAIJ(Mat A,int *m,int *n) 216 { 217 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 218 219 PetscFunctionBegin; 220 *m = 0; *n = a->m; 221 PetscFunctionReturn(0); 222 } 223 224 #undef __FUNC__ 225 #define __FUNC__ "MatGetRow_SeqBAIJ" 226 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 227 { 228 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 229 int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2; 230 MatScalar *aa,*aa_i; 231 Scalar *v_i; 232 233 PetscFunctionBegin; 234 bs = a->bs; 235 ai = a->i; 236 aj = a->j; 237 aa = a->a; 238 bs2 = a->bs2; 239 240 if (row < 0 || row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row out of range"); 241 242 bn = row/bs; /* Block number */ 243 bp = row % bs; /* Block Position */ 244 M = ai[bn+1] - ai[bn]; 245 *nz = bs*M; 246 247 if (v) { 248 *v = 0; 249 if (*nz) { 250 *v = (Scalar*)PetscMalloc((*nz)*sizeof(Scalar));CHKPTRQ(*v); 251 for (i=0; i<M; i++) { /* for each block in the block row */ 252 v_i = *v + i*bs; 253 aa_i = aa + bs2*(ai[bn] + i); 254 for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 255 } 256 } 257 } 258 259 if (idx) { 260 *idx = 0; 261 if (*nz) { 262 *idx = (int*)PetscMalloc((*nz)*sizeof(int));CHKPTRQ(*idx); 263 for (i=0; i<M; i++) { /* for each block in the block row */ 264 idx_i = *idx + i*bs; 265 itmp = bs*aj[ai[bn] + i]; 266 for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 267 } 268 } 269 } 270 PetscFunctionReturn(0); 271 } 272 273 #undef __FUNC__ 274 #define __FUNC__ "MatRestoreRow_SeqBAIJ" 275 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,Scalar **v) 276 { 277 int ierr; 278 279 PetscFunctionBegin; 280 if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 281 if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 282 PetscFunctionReturn(0); 283 } 284 285 #undef __FUNC__ 286 #define __FUNC__ "MatTranspose_SeqBAIJ" 287 int MatTranspose_SeqBAIJ(Mat A,Mat *B) 288 { 289 Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 290 Mat C; 291 int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 292 int *rows,*cols,bs2=a->bs2,refct; 293 MatScalar *array = a->a; 294 295 PetscFunctionBegin; 296 if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Square matrix only for in-place"); 297 col = (int*)PetscMalloc((1+nbs)*sizeof(int));CHKPTRQ(col); 298 ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr); 299 300 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 301 ierr = MatCreateSeqBAIJ(A->comm,bs,a->n,a->m,PETSC_NULL,col,&C);CHKERRQ(ierr); 302 ierr = PetscFree(col);CHKERRQ(ierr); 303 rows = (int*)PetscMalloc(2*bs*sizeof(int));CHKPTRQ(rows); 304 cols = rows + bs; 305 for (i=0; i<mbs; i++) { 306 cols[0] = i*bs; 307 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 308 len = ai[i+1] - ai[i]; 309 for (j=0; j<len; j++) { 310 rows[0] = (*aj++)*bs; 311 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 312 ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 313 array += bs2; 314 } 315 } 316 ierr = PetscFree(rows);CHKERRQ(ierr); 317 318 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 319 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 320 321 if (B) { 322 *B = C; 323 } else { 324 PetscOps *Abops; 325 MatOps Aops; 326 327 /* This isn't really an in-place transpose */ 328 ierr = PetscFree(a->a);CHKERRQ(ierr); 329 if (!a->singlemalloc) { 330 ierr = PetscFree(a->i);CHKERRQ(ierr); 331 ierr = PetscFree(a->j);CHKERRQ(ierr); 332 } 333 if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 334 if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 335 if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 336 if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 337 ierr = PetscFree(a);CHKERRQ(ierr); 338 339 340 ierr = MapDestroy(A->rmap);CHKERRQ(ierr); 341 ierr = MapDestroy(A->cmap);CHKERRQ(ierr); 342 343 /* 344 This is horrible, horrible code. We need to keep the 345 A pointers for the bops and ops but copy everything 346 else from C. 347 */ 348 Abops = A->bops; 349 Aops = A->ops; 350 refct = A->refct; 351 ierr = PetscMemcpy(A,C,sizeof(struct _p_Mat));CHKERRQ(ierr); 352 A->bops = Abops; 353 A->ops = Aops; 354 A->qlist = 0; 355 A->refct = refct; 356 357 PetscHeaderDestroy(C); 358 } 359 PetscFunctionReturn(0); 360 } 361 362 #undef __FUNC__ 363 #define __FUNC__ "MatView_SeqBAIJ_Binary" 364 static int MatView_SeqBAIJ_Binary(Mat A,Viewer viewer) 365 { 366 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 367 int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 368 Scalar *aa; 369 FILE *file; 370 371 PetscFunctionBegin; 372 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 373 col_lens = (int*)PetscMalloc((4+a->m)*sizeof(int));CHKPTRQ(col_lens); 374 col_lens[0] = MAT_COOKIE; 375 376 col_lens[1] = a->m; 377 col_lens[2] = a->n; 378 col_lens[3] = a->nz*bs2; 379 380 /* store lengths of each row and write (including header) to file */ 381 count = 0; 382 for (i=0; i<a->mbs; i++) { 383 for (j=0; j<bs; j++) { 384 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 385 } 386 } 387 ierr = PetscBinaryWrite(fd,col_lens,4+a->m,PETSC_INT,1);CHKERRQ(ierr); 388 ierr = PetscFree(col_lens);CHKERRQ(ierr); 389 390 /* store column indices (zero start index) */ 391 jj = (int*)PetscMalloc((a->nz+1)*bs2*sizeof(int));CHKPTRQ(jj); 392 count = 0; 393 for (i=0; i<a->mbs; i++) { 394 for (j=0; j<bs; j++) { 395 for (k=a->i[i]; k<a->i[i+1]; k++) { 396 for (l=0; l<bs; l++) { 397 jj[count++] = bs*a->j[k] + l; 398 } 399 } 400 } 401 } 402 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr); 403 ierr = PetscFree(jj);CHKERRQ(ierr); 404 405 /* store nonzero values */ 406 aa = (Scalar*)PetscMalloc((a->nz+1)*bs2*sizeof(Scalar));CHKPTRQ(aa); 407 count = 0; 408 for (i=0; i<a->mbs; i++) { 409 for (j=0; j<bs; j++) { 410 for (k=a->i[i]; k<a->i[i+1]; k++) { 411 for (l=0; l<bs; l++) { 412 aa[count++] = a->a[bs2*k + l*bs + j]; 413 } 414 } 415 } 416 } 417 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr); 418 ierr = PetscFree(aa);CHKERRQ(ierr); 419 420 ierr = ViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 421 if (file) { 422 fprintf(file,"-matload_block_size %d\n",a->bs); 423 } 424 PetscFunctionReturn(0); 425 } 426 427 #undef __FUNC__ 428 #define __FUNC__ "MatView_SeqBAIJ_ASCII" 429 static int MatView_SeqBAIJ_ASCII(Mat A,Viewer viewer) 430 { 431 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 432 int ierr,i,j,format,bs = a->bs,k,l,bs2=a->bs2; 433 char *outputname; 434 435 PetscFunctionBegin; 436 ierr = ViewerGetOutputname(viewer,&outputname);CHKERRQ(ierr); 437 ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 438 if (format == VIEWER_FORMAT_ASCII_INFO || format == VIEWER_FORMAT_ASCII_INFO_LONG) { 439 ierr = ViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 440 } else if (format == VIEWER_FORMAT_ASCII_MATLAB) { 441 SETERRQ(PETSC_ERR_SUP,0,"Matlab format not supported"); 442 } else if (format == VIEWER_FORMAT_ASCII_COMMON) { 443 ierr = ViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 444 for (i=0; i<a->mbs; i++) { 445 for (j=0; j<bs; j++) { 446 ierr = ViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 447 for (k=a->i[i]; k<a->i[i+1]; k++) { 448 for (l=0; l<bs; l++) { 449 #if defined(PETSC_USE_COMPLEX) 450 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 451 ierr = ViewerASCIIPrintf(viewer," %d %g + %gi",bs*a->j[k]+l, 452 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 453 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 454 ierr = ViewerASCIIPrintf(viewer," %d %g - %gi",bs*a->j[k]+l, 455 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 456 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 457 ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 458 } 459 #else 460 if (a->a[bs2*k + l*bs + j] != 0.0) { 461 ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 462 } 463 #endif 464 } 465 } 466 ierr = ViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 467 } 468 } 469 ierr = ViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 470 } else { 471 ierr = ViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 472 for (i=0; i<a->mbs; i++) { 473 for (j=0; j<bs; j++) { 474 ierr = ViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 475 for (k=a->i[i]; k<a->i[i+1]; k++) { 476 for (l=0; l<bs; l++) { 477 #if defined(PETSC_USE_COMPLEX) 478 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 479 ierr = ViewerASCIIPrintf(viewer," %d %g + %g i",bs*a->j[k]+l, 480 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 481 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 482 ierr = ViewerASCIIPrintf(viewer," %d %g - %g i",bs*a->j[k]+l, 483 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 484 } else { 485 ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 486 } 487 #else 488 ierr = ViewerASCIIPrintf(viewer," %d %g ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 489 #endif 490 } 491 } 492 ierr = ViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 493 } 494 } 495 ierr = ViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 496 } 497 ierr = ViewerFlush(viewer);CHKERRQ(ierr); 498 PetscFunctionReturn(0); 499 } 500 501 #undef __FUNC__ 502 #define __FUNC__ "MatView_SeqBAIJ_Draw_Zoom" 503 static int MatView_SeqBAIJ_Draw_Zoom(Draw draw,void *Aa) 504 { 505 Mat A = (Mat) Aa; 506 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 507 int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2; 508 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 509 MatScalar *aa; 510 Viewer viewer; 511 512 PetscFunctionBegin; 513 514 /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 515 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 516 517 ierr = DrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 518 519 /* loop over matrix elements drawing boxes */ 520 color = DRAW_BLUE; 521 for (i=0,row=0; i<mbs; i++,row+=bs) { 522 for (j=a->i[i]; j<a->i[i+1]; j++) { 523 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 524 x_l = a->j[j]*bs; x_r = x_l + 1.0; 525 aa = a->a + j*bs2; 526 for (k=0; k<bs; k++) { 527 for (l=0; l<bs; l++) { 528 if (PetscRealPart(*aa++) >= 0.) continue; 529 ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 530 } 531 } 532 } 533 } 534 color = DRAW_CYAN; 535 for (i=0,row=0; i<mbs; i++,row+=bs) { 536 for (j=a->i[i]; j<a->i[i+1]; j++) { 537 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 538 x_l = a->j[j]*bs; x_r = x_l + 1.0; 539 aa = a->a + j*bs2; 540 for (k=0; k<bs; k++) { 541 for (l=0; l<bs; l++) { 542 if (PetscRealPart(*aa++) != 0.) continue; 543 ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 544 } 545 } 546 } 547 } 548 549 color = DRAW_RED; 550 for (i=0,row=0; i<mbs; i++,row+=bs) { 551 for (j=a->i[i]; j<a->i[i+1]; j++) { 552 y_l = a->m - row - 1.0; y_r = y_l + 1.0; 553 x_l = a->j[j]*bs; x_r = x_l + 1.0; 554 aa = a->a + j*bs2; 555 for (k=0; k<bs; k++) { 556 for (l=0; l<bs; l++) { 557 if (PetscRealPart(*aa++) <= 0.) continue; 558 ierr = DrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 559 } 560 } 561 } 562 } 563 PetscFunctionReturn(0); 564 } 565 566 #undef __FUNC__ 567 #define __FUNC__ "MatView_SeqBAIJ_Draw" 568 static int MatView_SeqBAIJ_Draw(Mat A,Viewer viewer) 569 { 570 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 571 int ierr; 572 PetscReal xl,yl,xr,yr,w,h; 573 Draw draw; 574 PetscTruth isnull; 575 576 PetscFunctionBegin; 577 578 ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 579 ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 580 581 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 582 xr = a->n; yr = a->m; h = yr/10.0; w = xr/10.0; 583 xr += w; yr += h; xl = -w; yl = -h; 584 ierr = DrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 585 ierr = DrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 586 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 587 PetscFunctionReturn(0); 588 } 589 590 #undef __FUNC__ 591 #define __FUNC__ "MatView_SeqBAIJ" 592 int MatView_SeqBAIJ(Mat A,Viewer viewer) 593 { 594 int ierr; 595 PetscTruth issocket,isascii,isbinary,isdraw; 596 597 PetscFunctionBegin; 598 ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr); 599 ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 600 ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr); 601 ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 602 if (issocket) { 603 SETERRQ(PETSC_ERR_SUP,0,"Socket viewer not supported"); 604 } else if (isascii){ 605 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 606 } else if (isbinary) { 607 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 608 } else if (isdraw) { 609 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 610 } else { 611 SETERRQ1(1,1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name); 612 } 613 PetscFunctionReturn(0); 614 } 615 616 617 #undef __FUNC__ 618 #define __FUNC__ "MatGetValues_SeqBAIJ" 619 int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v) 620 { 621 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 622 int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 623 int *ai = a->i,*ailen = a->ilen; 624 int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 625 MatScalar *ap,*aa = a->a,zero = 0.0; 626 627 PetscFunctionBegin; 628 for (k=0; k<m; k++) { /* loop over rows */ 629 row = im[k]; brow = row/bs; 630 if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 631 if (row >= a->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 632 rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 633 nrow = ailen[brow]; 634 for (l=0; l<n; l++) { /* loop over columns */ 635 if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 636 if (in[l] >= a->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 637 col = in[l] ; 638 bcol = col/bs; 639 cidx = col%bs; 640 ridx = row%bs; 641 high = nrow; 642 low = 0; /* assume unsorted */ 643 while (high-low > 5) { 644 t = (low+high)/2; 645 if (rp[t] > bcol) high = t; 646 else low = t; 647 } 648 for (i=low; i<high; i++) { 649 if (rp[i] > bcol) break; 650 if (rp[i] == bcol) { 651 *v++ = ap[bs2*i+bs*cidx+ridx]; 652 goto finished; 653 } 654 } 655 *v++ = zero; 656 finished:; 657 } 658 } 659 PetscFunctionReturn(0); 660 } 661 662 663 #undef __FUNC__ 664 #define __FUNC__ "MatSetValuesBlocked_SeqBAIJ" 665 int MatSetValuesBlocked_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 666 { 667 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 668 int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 669 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 670 int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 671 Scalar *value = v; 672 MatScalar *ap,*aa=a->a,*bap; 673 674 PetscFunctionBegin; 675 if (roworiented) { 676 stepval = (n-1)*bs; 677 } else { 678 stepval = (m-1)*bs; 679 } 680 for (k=0; k<m; k++) { /* loop over added rows */ 681 row = im[k]; 682 if (row < 0) continue; 683 #if defined(PETSC_USE_BOPT_g) 684 if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 685 #endif 686 rp = aj + ai[row]; 687 ap = aa + bs2*ai[row]; 688 rmax = imax[row]; 689 nrow = ailen[row]; 690 low = 0; 691 for (l=0; l<n; l++) { /* loop over added columns */ 692 if (in[l] < 0) continue; 693 #if defined(PETSC_USE_BOPT_g) 694 if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 695 #endif 696 col = in[l]; 697 if (roworiented) { 698 value = v + k*(stepval+bs)*bs + l*bs; 699 } else { 700 value = v + l*(stepval+bs)*bs + k*bs; 701 } 702 if (!sorted) low = 0; high = nrow; 703 while (high-low > 7) { 704 t = (low+high)/2; 705 if (rp[t] > col) high = t; 706 else low = t; 707 } 708 for (i=low; i<high; i++) { 709 if (rp[i] > col) break; 710 if (rp[i] == col) { 711 bap = ap + bs2*i; 712 if (roworiented) { 713 if (is == ADD_VALUES) { 714 for (ii=0; ii<bs; ii++,value+=stepval) { 715 for (jj=ii; jj<bs2; jj+=bs) { 716 bap[jj] += *value++; 717 } 718 } 719 } else { 720 for (ii=0; ii<bs; ii++,value+=stepval) { 721 for (jj=ii; jj<bs2; jj+=bs) { 722 bap[jj] = *value++; 723 } 724 } 725 } 726 } else { 727 if (is == ADD_VALUES) { 728 for (ii=0; ii<bs; ii++,value+=stepval) { 729 for (jj=0; jj<bs; jj++) { 730 *bap++ += *value++; 731 } 732 } 733 } else { 734 for (ii=0; ii<bs; ii++,value+=stepval) { 735 for (jj=0; jj<bs; jj++) { 736 *bap++ = *value++; 737 } 738 } 739 } 740 } 741 goto noinsert2; 742 } 743 } 744 if (nonew == 1) goto noinsert2; 745 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 746 if (nrow >= rmax) { 747 /* there is no extra room in row, therefore enlarge */ 748 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 749 MatScalar *new_a; 750 751 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 752 753 /* malloc new storage space */ 754 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 755 new_a = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); 756 new_j = (int*)(new_a + bs2*new_nz); 757 new_i = new_j + new_nz; 758 759 /* copy over old data into new slots */ 760 for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 761 for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 762 ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 763 len = (new_nz - CHUNKSIZE - ai[row] - nrow); 764 ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 765 ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 766 ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 767 ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 768 /* free up old matrix storage */ 769 ierr = PetscFree(a->a);CHKERRQ(ierr); 770 if (!a->singlemalloc) { 771 ierr = PetscFree(a->i);CHKERRQ(ierr); 772 ierr = PetscFree(a->j);CHKERRQ(ierr); 773 } 774 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 775 a->singlemalloc = PETSC_TRUE; 776 777 rp = aj + ai[row]; ap = aa + bs2*ai[row]; 778 rmax = imax[row] = imax[row] + CHUNKSIZE; 779 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 780 a->maxnz += bs2*CHUNKSIZE; 781 a->reallocs++; 782 a->nz++; 783 } 784 N = nrow++ - 1; 785 /* shift up all the later entries in this row */ 786 for (ii=N; ii>=i; ii--) { 787 rp[ii+1] = rp[ii]; 788 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 789 } 790 if (N >= i) { 791 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 792 } 793 rp[i] = col; 794 bap = ap + bs2*i; 795 if (roworiented) { 796 for (ii=0; ii<bs; ii++,value+=stepval) { 797 for (jj=ii; jj<bs2; jj+=bs) { 798 bap[jj] = *value++; 799 } 800 } 801 } else { 802 for (ii=0; ii<bs; ii++,value+=stepval) { 803 for (jj=0; jj<bs; jj++) { 804 *bap++ = *value++; 805 } 806 } 807 } 808 noinsert2:; 809 low = i; 810 } 811 ailen[row] = nrow; 812 } 813 PetscFunctionReturn(0); 814 } 815 816 817 #undef __FUNC__ 818 #define __FUNC__ "MatAssemblyEnd_SeqBAIJ" 819 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 820 { 821 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 822 int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 823 int m = a->m,*ip,N,*ailen = a->ilen; 824 int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 825 MatScalar *aa = a->a,*ap; 826 827 PetscFunctionBegin; 828 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 829 830 if (m) rmax = ailen[0]; 831 for (i=1; i<mbs; i++) { 832 /* move each row back by the amount of empty slots (fshift) before it*/ 833 fshift += imax[i-1] - ailen[i-1]; 834 rmax = PetscMax(rmax,ailen[i]); 835 if (fshift) { 836 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 837 N = ailen[i]; 838 for (j=0; j<N; j++) { 839 ip[j-fshift] = ip[j]; 840 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 841 } 842 } 843 ai[i] = ai[i-1] + ailen[i-1]; 844 } 845 if (mbs) { 846 fshift += imax[mbs-1] - ailen[mbs-1]; 847 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 848 } 849 /* reset ilen and imax for each row */ 850 for (i=0; i<mbs; i++) { 851 ailen[i] = imax[i] = ai[i+1] - ai[i]; 852 } 853 a->nz = ai[mbs]; 854 855 /* diagonals may have moved, so kill the diagonal pointers */ 856 if (fshift && a->diag) { 857 ierr = PetscFree(a->diag);CHKERRQ(ierr); 858 PLogObjectMemory(A,-(m+1)*sizeof(int)); 859 a->diag = 0; 860 } 861 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n", 862 m,a->n,a->bs,fshift*bs2,a->nz*bs2); 863 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n", 864 a->reallocs); 865 PLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 866 a->reallocs = 0; 867 A->info.nz_unneeded = (PetscReal)fshift*bs2; 868 869 PetscFunctionReturn(0); 870 } 871 872 873 874 /* 875 This function returns an array of flags which indicate the locations of contiguous 876 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 877 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 878 Assume: sizes should be long enough to hold all the values. 879 */ 880 #undef __FUNC__ 881 #define __FUNC__ "MatZeroRows_SeqBAIJ_Check_Blocks" 882 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 883 { 884 int i,j,k,row; 885 PetscTruth flg; 886 887 PetscFunctionBegin; 888 for (i=0,j=0; i<n; j++) { 889 row = idx[i]; 890 if (row%bs!=0) { /* Not the begining of a block */ 891 sizes[j] = 1; 892 i++; 893 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 894 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 895 i++; 896 } else { /* Begining of the block, so check if the complete block exists */ 897 flg = PETSC_TRUE; 898 for (k=1; k<bs; k++) { 899 if (row+k != idx[i+k]) { /* break in the block */ 900 flg = PETSC_FALSE; 901 break; 902 } 903 } 904 if (flg == PETSC_TRUE) { /* No break in the bs */ 905 sizes[j] = bs; 906 i+= bs; 907 } else { 908 sizes[j] = 1; 909 i++; 910 } 911 } 912 } 913 *bs_max = j; 914 PetscFunctionReturn(0); 915 } 916 917 #undef __FUNC__ 918 #define __FUNC__ "MatZeroRows_SeqBAIJ" 919 int MatZeroRows_SeqBAIJ(Mat A,IS is,Scalar *diag) 920 { 921 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 922 int ierr,i,j,k,count,is_n,*is_idx,*rows; 923 int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max; 924 Scalar zero = 0.0; 925 MatScalar *aa; 926 927 PetscFunctionBegin; 928 /* Make a copy of the IS and sort it */ 929 ierr = ISGetSize(is,&is_n);CHKERRQ(ierr); 930 ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 931 932 /* allocate memory for rows,sizes */ 933 rows = (int*)PetscMalloc((3*is_n+1)*sizeof(int));CHKPTRQ(rows); 934 sizes = rows + is_n; 935 936 /* initialize copy IS valurs to rows, and sort them */ 937 for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 938 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 939 if (baij->keepzeroedrows) { 940 for (i=0; i<is_n; i++) { sizes[i] = 1; } 941 bs_max = is_n; 942 } else { 943 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 944 } 945 ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 946 947 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 948 row = rows[j]; 949 if (row < 0 || row > baij->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"row %d out of range",row); 950 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 951 aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 952 if (sizes[i] == bs && !baij->keepzeroedrows) { 953 if (diag) { 954 if (baij->ilen[row/bs] > 0) { 955 baij->ilen[row/bs] = 1; 956 baij->j[baij->i[row/bs]] = row/bs; 957 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 958 } 959 /* Now insert all the diagoanl values for this bs */ 960 for (k=0; k<bs; k++) { 961 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 962 } 963 } else { /* (!diag) */ 964 baij->ilen[row/bs] = 0; 965 } /* end (!diag) */ 966 } else { /* (sizes[i] != bs) */ 967 #if defined (PETSC_USE_DEBUG) 968 if (sizes[i] != 1) SETERRQ(1,0,"Internal Error. Value should be 1"); 969 #endif 970 for (k=0; k<count; k++) { 971 aa[0] = zero; 972 aa+=bs; 973 } 974 if (diag) { 975 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 976 } 977 } 978 } 979 980 ierr = PetscFree(rows);CHKERRQ(ierr); 981 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 982 PetscFunctionReturn(0); 983 } 984 985 #undef __FUNC__ 986 #define __FUNC__ "MatSetValues_SeqBAIJ" 987 int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,Scalar *v,InsertMode is) 988 { 989 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 990 int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 991 int *imax=a->imax,*ai=a->i,*ailen=a->ilen,roworiented=a->roworiented; 992 int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 993 int ridx,cidx,bs2=a->bs2,ierr; 994 MatScalar *ap,value,*aa=a->a,*bap; 995 996 PetscFunctionBegin; 997 for (k=0; k<m; k++) { /* loop over added rows */ 998 row = im[k]; brow = row/bs; 999 if (row < 0) continue; 1000 #if defined(PETSC_USE_BOPT_g) 1001 if (row >= a->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large: row %d max %d",row,a->m); 1002 #endif 1003 rp = aj + ai[brow]; 1004 ap = aa + bs2*ai[brow]; 1005 rmax = imax[brow]; 1006 nrow = ailen[brow]; 1007 low = 0; 1008 for (l=0; l<n; l++) { /* loop over added columns */ 1009 if (in[l] < 0) continue; 1010 #if defined(PETSC_USE_BOPT_g) 1011 if (in[l] >= a->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large: col %d max %d",in[l],a->n); 1012 #endif 1013 col = in[l]; bcol = col/bs; 1014 ridx = row % bs; cidx = col % bs; 1015 if (roworiented) { 1016 value = v[l + k*n]; 1017 } else { 1018 value = v[k + l*m]; 1019 } 1020 if (!sorted) low = 0; high = nrow; 1021 while (high-low > 7) { 1022 t = (low+high)/2; 1023 if (rp[t] > bcol) high = t; 1024 else low = t; 1025 } 1026 for (i=low; i<high; i++) { 1027 if (rp[i] > bcol) break; 1028 if (rp[i] == bcol) { 1029 bap = ap + bs2*i + bs*cidx + ridx; 1030 if (is == ADD_VALUES) *bap += value; 1031 else *bap = value; 1032 goto noinsert1; 1033 } 1034 } 1035 if (nonew == 1) goto noinsert1; 1036 else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 1037 if (nrow >= rmax) { 1038 /* there is no extra room in row, therefore enlarge */ 1039 int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 1040 MatScalar *new_a; 1041 1042 if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); 1043 1044 /* Malloc new storage space */ 1045 len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1046 new_a = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); 1047 new_j = (int*)(new_a + bs2*new_nz); 1048 new_i = new_j + new_nz; 1049 1050 /* copy over old data into new slots */ 1051 for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 1052 for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1053 ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 1054 len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1055 ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 1056 ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1057 ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1058 ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 1059 /* free up old matrix storage */ 1060 ierr = PetscFree(a->a);CHKERRQ(ierr); 1061 if (!a->singlemalloc) { 1062 ierr = PetscFree(a->i);CHKERRQ(ierr); 1063 ierr = PetscFree(a->j);CHKERRQ(ierr); 1064 } 1065 aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1066 a->singlemalloc = PETSC_TRUE; 1067 1068 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1069 rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1070 PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 1071 a->maxnz += bs2*CHUNKSIZE; 1072 a->reallocs++; 1073 a->nz++; 1074 } 1075 N = nrow++ - 1; 1076 /* shift up all the later entries in this row */ 1077 for (ii=N; ii>=i; ii--) { 1078 rp[ii+1] = rp[ii]; 1079 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1080 } 1081 if (N>=i) { 1082 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1083 } 1084 rp[i] = bcol; 1085 ap[bs2*i + bs*cidx + ridx] = value; 1086 noinsert1:; 1087 low = i; 1088 } 1089 ailen[brow] = nrow; 1090 } 1091 PetscFunctionReturn(0); 1092 } 1093 1094 extern int MatLUFactorSymbolic_SeqBAIJ(Mat,IS,IS,PetscReal,Mat*); 1095 extern int MatLUFactor_SeqBAIJ(Mat,IS,IS,PetscReal); 1096 extern int MatIncreaseOverlap_SeqBAIJ(Mat,int,IS*,int); 1097 extern int MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,int,MatReuse,Mat*); 1098 extern int MatGetSubMatrices_SeqBAIJ(Mat,int,IS*,IS*,MatReuse,Mat**); 1099 extern int MatMultTranspose_SeqBAIJ(Mat,Vec,Vec); 1100 extern int MatMultTransposeAdd_SeqBAIJ(Mat,Vec,Vec,Vec); 1101 extern int MatScale_SeqBAIJ(Scalar*,Mat); 1102 extern int MatNorm_SeqBAIJ(Mat,NormType,PetscReal *); 1103 extern int MatEqual_SeqBAIJ(Mat,Mat,PetscTruth*); 1104 extern int MatGetDiagonal_SeqBAIJ(Mat,Vec); 1105 extern int MatDiagonalScale_SeqBAIJ(Mat,Vec,Vec); 1106 extern int MatGetInfo_SeqBAIJ(Mat,MatInfoType,MatInfo *); 1107 extern int MatZeroEntries_SeqBAIJ(Mat); 1108 1109 extern int MatSolve_SeqBAIJ_N(Mat,Vec,Vec); 1110 extern int MatSolve_SeqBAIJ_1(Mat,Vec,Vec); 1111 extern int MatSolve_SeqBAIJ_2(Mat,Vec,Vec); 1112 extern int MatSolve_SeqBAIJ_3(Mat,Vec,Vec); 1113 extern int MatSolve_SeqBAIJ_4(Mat,Vec,Vec); 1114 extern int MatSolve_SeqBAIJ_5(Mat,Vec,Vec); 1115 extern int MatSolve_SeqBAIJ_6(Mat,Vec,Vec); 1116 extern int MatSolve_SeqBAIJ_7(Mat,Vec,Vec); 1117 extern int MatSolveTranspose_SeqBAIJ_7(Mat,Vec,Vec); 1118 extern int MatSolveTranspose_SeqBAIJ_6(Mat,Vec,Vec); 1119 extern int MatSolveTranspose_SeqBAIJ_5(Mat,Vec,Vec); 1120 extern int MatSolveTranspose_SeqBAIJ_4(Mat,Vec,Vec); 1121 extern int MatSolveTranspose_SeqBAIJ_3(Mat,Vec,Vec); 1122 extern int MatSolveTranspose_SeqBAIJ_2(Mat,Vec,Vec); 1123 extern int MatSolveTranspose_SeqBAIJ_1(Mat,Vec,Vec); 1124 1125 extern int MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat*); 1126 extern int MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat*); 1127 extern int MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat*); 1128 extern int MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat*); 1129 extern int MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat*); 1130 extern int MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat*); 1131 extern int MatLUFactorNumeric_SeqBAIJ_6(Mat,Mat*); 1132 1133 extern int MatMult_SeqBAIJ_1(Mat,Vec,Vec); 1134 extern int MatMult_SeqBAIJ_2(Mat,Vec,Vec); 1135 extern int MatMult_SeqBAIJ_3(Mat,Vec,Vec); 1136 extern int MatMult_SeqBAIJ_4(Mat,Vec,Vec); 1137 extern int MatMult_SeqBAIJ_5(Mat,Vec,Vec); 1138 extern int MatMult_SeqBAIJ_6(Mat,Vec,Vec); 1139 extern int MatMult_SeqBAIJ_7(Mat,Vec,Vec); 1140 extern int MatMult_SeqBAIJ_N(Mat,Vec,Vec); 1141 1142 extern int MatMultAdd_SeqBAIJ_1(Mat,Vec,Vec,Vec); 1143 extern int MatMultAdd_SeqBAIJ_2(Mat,Vec,Vec,Vec); 1144 extern int MatMultAdd_SeqBAIJ_3(Mat,Vec,Vec,Vec); 1145 extern int MatMultAdd_SeqBAIJ_4(Mat,Vec,Vec,Vec); 1146 extern int MatMultAdd_SeqBAIJ_5(Mat,Vec,Vec,Vec); 1147 extern int MatMultAdd_SeqBAIJ_6(Mat,Vec,Vec,Vec); 1148 extern int MatMultAdd_SeqBAIJ_7(Mat,Vec,Vec,Vec); 1149 extern int MatMultAdd_SeqBAIJ_N(Mat,Vec,Vec,Vec); 1150 1151 #undef __FUNC__ 1152 #define __FUNC__ "MatILUFactor_SeqBAIJ" 1153 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info) 1154 { 1155 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 1156 Mat outA; 1157 int ierr; 1158 PetscTruth row_identity,col_identity; 1159 1160 PetscFunctionBegin; 1161 if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,0,"Only levels = 0 supported for in-place ILU"); 1162 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1163 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1164 if (!row_identity || !col_identity) { 1165 SETERRQ(1,1,"Row and column permutations must be identity for in-place ILU"); 1166 } 1167 1168 outA = inA; 1169 inA->factor = FACTOR_LU; 1170 1171 if (!a->diag) { 1172 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 1173 } 1174 /* 1175 Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1176 for ILU(0) factorization with natural ordering 1177 */ 1178 switch (a->bs) { 1179 case 1: 1180 inA->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_2_NaturalOrdering; 1181 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering solvetrans BS=1\n"); 1182 case 2: 1183 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering; 1184 inA->ops->solve = MatSolve_SeqBAIJ_2_NaturalOrdering; 1185 inA->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_2_NaturalOrdering; 1186 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=2\n"); 1187 break; 1188 case 3: 1189 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering; 1190 inA->ops->solve = MatSolve_SeqBAIJ_3_NaturalOrdering; 1191 inA->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_3_NaturalOrdering; 1192 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=3\n"); 1193 break; 1194 case 4: 1195 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering; 1196 inA->ops->solve = MatSolve_SeqBAIJ_4_NaturalOrdering; 1197 inA->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_4_NaturalOrdering; 1198 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=4\n"); 1199 break; 1200 case 5: 1201 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering; 1202 inA->ops->solve = MatSolve_SeqBAIJ_5_NaturalOrdering; 1203 inA->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_5_NaturalOrdering; 1204 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=5\n"); 1205 break; 1206 case 6: 1207 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering; 1208 inA->ops->solve = MatSolve_SeqBAIJ_6_NaturalOrdering; 1209 inA->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_6_NaturalOrdering; 1210 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=6\n"); 1211 break; 1212 case 7: 1213 inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering; 1214 inA->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_7_NaturalOrdering; 1215 inA->ops->solve = MatSolve_SeqBAIJ_7_NaturalOrdering; 1216 PLogInfo(inA,"MatILUFactor_SeqBAIJ:Using special in-place natural ordering factor and solve BS=7\n"); 1217 break; 1218 default: 1219 a->row = row; 1220 a->col = col; 1221 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1222 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1223 1224 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 1225 ierr = ISInvertPermutation(col,&(a->icol));CHKERRQ(ierr); 1226 PLogObjectParent(inA,a->icol); 1227 1228 if (!a->solve_work) { 1229 a->solve_work = (Scalar*)PetscMalloc((a->m+a->bs)*sizeof(Scalar));CHKPTRQ(a->solve_work); 1230 PLogObjectMemory(inA,(a->m+a->bs)*sizeof(Scalar)); 1231 } 1232 } 1233 1234 ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr); 1235 1236 PetscFunctionReturn(0); 1237 } 1238 #undef __FUNC__ 1239 #define __FUNC__ "MatPrintHelp_SeqBAIJ" 1240 int MatPrintHelp_SeqBAIJ(Mat A) 1241 { 1242 static PetscTruth called = PETSC_FALSE; 1243 MPI_Comm comm = A->comm; 1244 int ierr; 1245 1246 PetscFunctionBegin; 1247 if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1248 ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1249 ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 1250 PetscFunctionReturn(0); 1251 } 1252 1253 EXTERN_C_BEGIN 1254 #undef __FUNC__ 1255 #define __FUNC__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 1256 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices) 1257 { 1258 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 1259 int i,nz,n; 1260 1261 PetscFunctionBegin; 1262 nz = baij->maxnz; 1263 n = baij->n; 1264 for (i=0; i<nz; i++) { 1265 baij->j[i] = indices[i]; 1266 } 1267 baij->nz = nz; 1268 for (i=0; i<n; i++) { 1269 baij->ilen[i] = baij->imax[i]; 1270 } 1271 1272 PetscFunctionReturn(0); 1273 } 1274 EXTERN_C_END 1275 1276 #undef __FUNC__ 1277 #define __FUNC__ "MatSeqBAIJSetColumnIndices" 1278 /*@ 1279 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 1280 in the matrix. 1281 1282 Input Parameters: 1283 + mat - the SeqBAIJ matrix 1284 - indices - the column indices 1285 1286 Level: advanced 1287 1288 Notes: 1289 This can be called if you have precomputed the nonzero structure of the 1290 matrix and want to provide it to the matrix object to improve the performance 1291 of the MatSetValues() operation. 1292 1293 You MUST have set the correct numbers of nonzeros per row in the call to 1294 MatCreateSeqBAIJ(). 1295 1296 MUST be called before any calls to MatSetValues(); 1297 1298 @*/ 1299 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices) 1300 { 1301 int ierr,(*f)(Mat,int *); 1302 1303 PetscFunctionBegin; 1304 PetscValidHeaderSpecific(mat,MAT_COOKIE); 1305 ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void **)&f);CHKERRQ(ierr); 1306 if (f) { 1307 ierr = (*f)(mat,indices);CHKERRQ(ierr); 1308 } else { 1309 SETERRQ(1,1,"Wrong type of matrix to set column indices"); 1310 } 1311 PetscFunctionReturn(0); 1312 } 1313 1314 /* -------------------------------------------------------------------*/ 1315 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 1316 MatGetRow_SeqBAIJ, 1317 MatRestoreRow_SeqBAIJ, 1318 MatMult_SeqBAIJ_N, 1319 MatMultAdd_SeqBAIJ_N, 1320 MatMultTranspose_SeqBAIJ, 1321 MatMultTransposeAdd_SeqBAIJ, 1322 MatSolve_SeqBAIJ_N, 1323 0, 1324 0, 1325 0, 1326 MatLUFactor_SeqBAIJ, 1327 0, 1328 0, 1329 MatTranspose_SeqBAIJ, 1330 MatGetInfo_SeqBAIJ, 1331 MatEqual_SeqBAIJ, 1332 MatGetDiagonal_SeqBAIJ, 1333 MatDiagonalScale_SeqBAIJ, 1334 MatNorm_SeqBAIJ, 1335 0, 1336 MatAssemblyEnd_SeqBAIJ, 1337 0, 1338 MatSetOption_SeqBAIJ, 1339 MatZeroEntries_SeqBAIJ, 1340 MatZeroRows_SeqBAIJ, 1341 MatLUFactorSymbolic_SeqBAIJ, 1342 MatLUFactorNumeric_SeqBAIJ_N, 1343 0, 1344 0, 1345 MatGetSize_SeqBAIJ, 1346 MatGetSize_SeqBAIJ, 1347 MatGetOwnershipRange_SeqBAIJ, 1348 MatILUFactorSymbolic_SeqBAIJ, 1349 0, 1350 0, 1351 0, 1352 MatDuplicate_SeqBAIJ, 1353 0, 1354 0, 1355 MatILUFactor_SeqBAIJ, 1356 0, 1357 0, 1358 MatGetSubMatrices_SeqBAIJ, 1359 MatIncreaseOverlap_SeqBAIJ, 1360 MatGetValues_SeqBAIJ, 1361 0, 1362 MatPrintHelp_SeqBAIJ, 1363 MatScale_SeqBAIJ, 1364 0, 1365 0, 1366 0, 1367 MatGetBlockSize_SeqBAIJ, 1368 MatGetRowIJ_SeqBAIJ, 1369 MatRestoreRowIJ_SeqBAIJ, 1370 0, 1371 0, 1372 0, 1373 0, 1374 0, 1375 0, 1376 MatSetValuesBlocked_SeqBAIJ, 1377 MatGetSubMatrix_SeqBAIJ, 1378 0, 1379 0, 1380 MatGetMaps_Petsc}; 1381 1382 EXTERN_C_BEGIN 1383 #undef __FUNC__ 1384 #define __FUNC__ "MatStoreValues_SeqBAIJ" 1385 int MatStoreValues_SeqBAIJ(Mat mat) 1386 { 1387 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1388 int nz = aij->i[aij->m]*aij->bs*aij->bs2; 1389 int ierr; 1390 1391 PetscFunctionBegin; 1392 if (aij->nonew != 1) { 1393 SETERRQ(1,1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1394 } 1395 1396 /* allocate space for values if not already there */ 1397 if (!aij->saved_values) { 1398 aij->saved_values = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(aij->saved_values); 1399 } 1400 1401 /* copy values over */ 1402 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(Scalar));CHKERRQ(ierr); 1403 PetscFunctionReturn(0); 1404 } 1405 EXTERN_C_END 1406 1407 EXTERN_C_BEGIN 1408 #undef __FUNC__ 1409 #define __FUNC__ "MatRetrieveValues_SeqBAIJ" 1410 int MatRetrieveValues_SeqBAIJ(Mat mat) 1411 { 1412 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1413 int nz = aij->i[aij->m]*aij->bs*aij->bs2,ierr; 1414 1415 PetscFunctionBegin; 1416 if (aij->nonew != 1) { 1417 SETERRQ(1,1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 1418 } 1419 if (!aij->saved_values) { 1420 SETERRQ(1,1,"Must call MatStoreValues(A);first"); 1421 } 1422 1423 /* copy values over */ 1424 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(Scalar));CHKERRQ(ierr); 1425 PetscFunctionReturn(0); 1426 } 1427 EXTERN_C_END 1428 1429 #undef __FUNC__ 1430 #define __FUNC__ "MatCreateSeqBAIJ" 1431 /*@C 1432 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1433 compressed row) format. For good matrix assembly performance the 1434 user should preallocate the matrix storage by setting the parameter nz 1435 (or the array nnz). By setting these parameters accurately, performance 1436 during matrix assembly can be increased by more than a factor of 50. 1437 1438 Collective on MPI_Comm 1439 1440 Input Parameters: 1441 + comm - MPI communicator, set to PETSC_COMM_SELF 1442 . bs - size of block 1443 . m - number of rows 1444 . n - number of columns 1445 . nz - number of block nonzeros per block row (same for all rows) 1446 - nnz - array containing the number of block nonzeros in the various block rows 1447 (possibly different for each block row) or PETSC_NULL 1448 1449 Output Parameter: 1450 . A - the matrix 1451 1452 Options Database Keys: 1453 . -mat_no_unroll - uses code that does not unroll the loops in the 1454 block calculations (much slower) 1455 . -mat_block_size - size of the blocks to use 1456 1457 Level: intermediate 1458 1459 Notes: 1460 The block AIJ format is fully compatible with standard Fortran 77 1461 storage. That is, the stored row and column indices can begin at 1462 either one (as in Fortran) or zero. See the users' manual for details. 1463 1464 Specify the preallocated storage with either nz or nnz (not both). 1465 Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1466 allocation. For additional details, see the users manual chapter on 1467 matrices. 1468 1469 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1470 @*/ 1471 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A) 1472 { 1473 Mat B; 1474 Mat_SeqBAIJ *b; 1475 int i,len,ierr,mbs,nbs,bs2,size; 1476 PetscTruth flg; 1477 1478 PetscFunctionBegin; 1479 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1480 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"Comm must be of size 1"); 1481 1482 ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1483 mbs = m/bs; 1484 nbs = n/bs; 1485 bs2 = bs*bs; 1486 1487 if (mbs*bs!=m || nbs*bs!=n) { 1488 SETERRQ(PETSC_ERR_ARG_SIZ,0,"Number rows, cols must be divisible by blocksize"); 1489 } 1490 1491 if (nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"nz cannot be less than -2: value %d",nz); 1492 if (nnz) { 1493 for (i=0; i<mbs; i++) { 1494 if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 1495 } 1496 } 1497 1498 *A = 0; 1499 PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATSEQBAIJ,"Mat",comm,MatDestroy,MatView); 1500 PLogObjectCreate(B); 1501 B->data = (void*)(b = PetscNew(Mat_SeqBAIJ));CHKPTRQ(b); 1502 ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 1503 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1504 ierr = OptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 1505 if (!flg) { 1506 switch (bs) { 1507 case 1: 1508 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1509 B->ops->solve = MatSolve_SeqBAIJ_1; 1510 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_1; 1511 B->ops->mult = MatMult_SeqBAIJ_1; 1512 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 1513 break; 1514 case 2: 1515 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 1516 B->ops->solve = MatSolve_SeqBAIJ_2; 1517 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_2; 1518 B->ops->mult = MatMult_SeqBAIJ_2; 1519 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 1520 break; 1521 case 3: 1522 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 1523 B->ops->solve = MatSolve_SeqBAIJ_3; 1524 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_3; 1525 B->ops->mult = MatMult_SeqBAIJ_3; 1526 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 1527 break; 1528 case 4: 1529 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 1530 B->ops->solve = MatSolve_SeqBAIJ_4; 1531 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_4; 1532 B->ops->mult = MatMult_SeqBAIJ_4; 1533 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 1534 break; 1535 case 5: 1536 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 1537 B->ops->solve = MatSolve_SeqBAIJ_5; 1538 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_5; 1539 B->ops->mult = MatMult_SeqBAIJ_5; 1540 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 1541 break; 1542 case 6: 1543 B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 1544 B->ops->solve = MatSolve_SeqBAIJ_6; 1545 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_6; 1546 B->ops->mult = MatMult_SeqBAIJ_6; 1547 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 1548 break; 1549 case 7: 1550 B->ops->mult = MatMult_SeqBAIJ_7; 1551 B->ops->solve = MatSolve_SeqBAIJ_7; 1552 B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_7; 1553 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 1554 break; 1555 } 1556 } 1557 B->ops->destroy = MatDestroy_SeqBAIJ; 1558 B->ops->view = MatView_SeqBAIJ; 1559 B->factor = 0; 1560 B->lupivotthreshold = 1.0; 1561 B->mapping = 0; 1562 b->row = 0; 1563 b->col = 0; 1564 b->icol = 0; 1565 b->reallocs = 0; 1566 b->saved_values = 0; 1567 1568 b->m = m; B->m = m; B->M = m; 1569 b->n = n; B->n = n; B->N = n; 1570 1571 ierr = MapCreateMPI(comm,m,m,&B->rmap);CHKERRQ(ierr); 1572 ierr = MapCreateMPI(comm,n,n,&B->cmap);CHKERRQ(ierr); 1573 1574 b->mbs = mbs; 1575 b->nbs = nbs; 1576 b->imax = (int*)PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(b->imax); 1577 if (!nnz) { 1578 if (nz == PETSC_DEFAULT) nz = 5; 1579 else if (nz <= 0) nz = 1; 1580 for (i=0; i<mbs; i++) b->imax[i] = nz; 1581 nz = nz*mbs; 1582 } else { 1583 nz = 0; 1584 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 1585 } 1586 1587 /* allocate the matrix space */ 1588 len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (b->m+1)*sizeof(int); 1589 b->a = (MatScalar*)PetscMalloc(len);CHKPTRQ(b->a); 1590 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1591 b->j = (int*)(b->a + nz*bs2); 1592 ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 1593 b->i = b->j + nz; 1594 b->singlemalloc = PETSC_TRUE; 1595 1596 b->i[0] = 0; 1597 for (i=1; i<mbs+1; i++) { 1598 b->i[i] = b->i[i-1] + b->imax[i-1]; 1599 } 1600 1601 /* b->ilen will count nonzeros in each block row so far. */ 1602 b->ilen = (int*)PetscMalloc((mbs+1)*sizeof(int)); 1603 PLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1604 for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 1605 1606 b->bs = bs; 1607 b->bs2 = bs2; 1608 b->mbs = mbs; 1609 b->nz = 0; 1610 b->maxnz = nz*bs2; 1611 b->sorted = PETSC_FALSE; 1612 b->roworiented = PETSC_TRUE; 1613 b->nonew = 0; 1614 b->diag = 0; 1615 b->solve_work = 0; 1616 b->mult_work = 0; 1617 b->spptr = 0; 1618 B->info.nz_unneeded = (PetscReal)b->maxnz; 1619 b->keepzeroedrows = PETSC_FALSE; 1620 1621 *A = B; 1622 ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 1623 if (flg) {ierr = MatPrintHelp(B);CHKERRQ(ierr); } 1624 1625 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 1626 "MatStoreValues_SeqBAIJ", 1627 (void*)MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 1628 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 1629 "MatRetrieveValues_SeqBAIJ", 1630 (void*)MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 1631 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 1632 "MatSeqBAIJSetColumnIndices_SeqBAIJ", 1633 (void*)MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 1634 PetscFunctionReturn(0); 1635 } 1636 1637 #undef __FUNC__ 1638 #define __FUNC__ "MatDuplicate_SeqBAIJ" 1639 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 1640 { 1641 Mat C; 1642 Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 1643 int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr; 1644 1645 PetscFunctionBegin; 1646 if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,0,"Corrupt matrix"); 1647 1648 *B = 0; 1649 PetscHeaderCreate(C,_p_Mat,struct _MatOps,MAT_COOKIE,MATSEQBAIJ,"Mat",A->comm,MatDestroy,MatView); 1650 PLogObjectCreate(C); 1651 C->data = (void*)(c = PetscNew(Mat_SeqBAIJ));CHKPTRQ(c); 1652 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1653 C->ops->destroy = MatDestroy_SeqBAIJ; 1654 C->ops->view = MatView_SeqBAIJ; 1655 C->factor = A->factor; 1656 c->row = 0; 1657 c->col = 0; 1658 c->icol = 0; 1659 c->saved_values = 0; 1660 c->keepzeroedrows = a->keepzeroedrows; 1661 C->assembled = PETSC_TRUE; 1662 1663 c->m = C->m = a->m; 1664 c->n = C->n = a->n; 1665 C->M = a->m; 1666 C->N = a->n; 1667 1668 c->bs = a->bs; 1669 c->bs2 = a->bs2; 1670 c->mbs = a->mbs; 1671 c->nbs = a->nbs; 1672 1673 c->imax = (int*)PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->imax); 1674 c->ilen = (int*)PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->ilen); 1675 for (i=0; i<mbs; i++) { 1676 c->imax[i] = a->imax[i]; 1677 c->ilen[i] = a->ilen[i]; 1678 } 1679 1680 /* allocate the matrix space */ 1681 c->singlemalloc = PETSC_TRUE; 1682 len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1683 c->a = (MatScalar*)PetscMalloc(len);CHKPTRQ(c->a); 1684 c->j = (int*)(c->a + nz*bs2); 1685 c->i = c->j + nz; 1686 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1687 if (mbs > 0) { 1688 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 1689 if (cpvalues == MAT_COPY_VALUES) { 1690 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1691 } else { 1692 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 1693 } 1694 } 1695 1696 PLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 1697 c->sorted = a->sorted; 1698 c->roworiented = a->roworiented; 1699 c->nonew = a->nonew; 1700 1701 if (a->diag) { 1702 c->diag = (int*)PetscMalloc((mbs+1)*sizeof(int));CHKPTRQ(c->diag); 1703 PLogObjectMemory(C,(mbs+1)*sizeof(int)); 1704 for (i=0; i<mbs; i++) { 1705 c->diag[i] = a->diag[i]; 1706 } 1707 } else c->diag = 0; 1708 c->nz = a->nz; 1709 c->maxnz = a->maxnz; 1710 c->solve_work = 0; 1711 c->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 1712 c->mult_work = 0; 1713 *B = C; 1714 ierr = FListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 1715 PetscFunctionReturn(0); 1716 } 1717 1718 #undef __FUNC__ 1719 #define __FUNC__ "MatLoad_SeqBAIJ" 1720 int MatLoad_SeqBAIJ(Viewer viewer,MatType type,Mat *A) 1721 { 1722 Mat_SeqBAIJ *a; 1723 Mat B; 1724 int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1725 int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 1726 int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1727 int *masked,nmask,tmp,bs2,ishift; 1728 Scalar *aa; 1729 MPI_Comm comm = ((PetscObject)viewer)->comm; 1730 1731 PetscFunctionBegin; 1732 ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1733 bs2 = bs*bs; 1734 1735 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1736 if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,0,"view must have one processor"); 1737 ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1738 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1739 if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not Mat object"); 1740 M = header[1]; N = header[2]; nz = header[3]; 1741 1742 if (header[3] < 0) { 1743 SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as SeqBAIJ"); 1744 } 1745 1746 if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 1747 1748 /* 1749 This code adds extra rows to make sure the number of rows is 1750 divisible by the blocksize 1751 */ 1752 mbs = M/bs; 1753 extra_rows = bs - M + bs*(mbs); 1754 if (extra_rows == bs) extra_rows = 0; 1755 else mbs++; 1756 if (extra_rows) { 1757 PLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 1758 } 1759 1760 /* read in row lengths */ 1761 rowlengths = (int*)PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 1762 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 1763 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 1764 1765 /* read in column indices */ 1766 jj = (int*)PetscMalloc((nz+extra_rows)*sizeof(int));CHKPTRQ(jj); 1767 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 1768 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1769 1770 /* loop over row lengths determining block row lengths */ 1771 browlengths = (int*)PetscMalloc(mbs*sizeof(int));CHKPTRQ(browlengths); 1772 ierr = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1773 mask = (int*)PetscMalloc(2*mbs*sizeof(int));CHKPTRQ(mask); 1774 ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 1775 masked = mask + mbs; 1776 rowcount = 0; nzcount = 0; 1777 for (i=0; i<mbs; i++) { 1778 nmask = 0; 1779 for (j=0; j<bs; j++) { 1780 kmax = rowlengths[rowcount]; 1781 for (k=0; k<kmax; k++) { 1782 tmp = jj[nzcount++]/bs; 1783 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1784 } 1785 rowcount++; 1786 } 1787 browlengths[i] += nmask; 1788 /* zero out the mask elements we set */ 1789 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1790 } 1791 1792 /* create our matrix */ 1793 ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr); 1794 B = *A; 1795 a = (Mat_SeqBAIJ*)B->data; 1796 1797 /* set matrix "i" values */ 1798 a->i[0] = 0; 1799 for (i=1; i<= mbs; i++) { 1800 a->i[i] = a->i[i-1] + browlengths[i-1]; 1801 a->ilen[i-1] = browlengths[i-1]; 1802 } 1803 a->nz = 0; 1804 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 1805 1806 /* read in nonzero values */ 1807 aa = (Scalar*)PetscMalloc((nz+extra_rows)*sizeof(Scalar));CHKPTRQ(aa); 1808 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 1809 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1810 1811 /* set "a" and "j" values into matrix */ 1812 nzcount = 0; jcount = 0; 1813 for (i=0; i<mbs; i++) { 1814 nzcountb = nzcount; 1815 nmask = 0; 1816 for (j=0; j<bs; j++) { 1817 kmax = rowlengths[i*bs+j]; 1818 for (k=0; k<kmax; k++) { 1819 tmp = jj[nzcount++]/bs; 1820 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1821 } 1822 rowcount++; 1823 } 1824 /* sort the masked values */ 1825 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1826 1827 /* set "j" values into matrix */ 1828 maskcount = 1; 1829 for (j=0; j<nmask; j++) { 1830 a->j[jcount++] = masked[j]; 1831 mask[masked[j]] = maskcount++; 1832 } 1833 /* set "a" values into matrix */ 1834 ishift = bs2*a->i[i]; 1835 for (j=0; j<bs; j++) { 1836 kmax = rowlengths[i*bs+j]; 1837 for (k=0; k<kmax; k++) { 1838 tmp = jj[nzcountb]/bs ; 1839 block = mask[tmp] - 1; 1840 point = jj[nzcountb] - bs*tmp; 1841 idx = ishift + bs2*block + j + bs*point; 1842 a->a[idx] = aa[nzcountb++]; 1843 } 1844 } 1845 /* zero out the mask elements we set */ 1846 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1847 } 1848 if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"Bad binary matrix"); 1849 1850 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1851 ierr = PetscFree(browlengths);CHKERRQ(ierr); 1852 ierr = PetscFree(aa);CHKERRQ(ierr); 1853 ierr = PetscFree(jj);CHKERRQ(ierr); 1854 ierr = PetscFree(mask);CHKERRQ(ierr); 1855 1856 B->assembled = PETSC_TRUE; 1857 1858 ierr = MatView_Private(B);CHKERRQ(ierr); 1859 PetscFunctionReturn(0); 1860 } 1861 1862 1863 1864