Actual source code: memory.cxx

  1: #include <petsc/private/deviceimpl.h>

  3: #include <petsc/private/cpp/register_finalize.hpp>
  4: #include <petsc/private/cpp/type_traits.hpp>
  5: #include <petsc/private/cpp/unordered_map.hpp>

  7: #include <algorithm> // std::find_if
  8: #include <cstring>   // std::memset

 10: #include <petsc/private/cpp/object_pool.hpp>

 12: namespace Petsc
 13: {

 15: namespace memory
 16: {

 18: typename PoolAllocated::allocator_type PoolAllocated::pool_{};

 20: } // namespace memory

 22: } // namespace Petsc

 24: const char *const PetscDeviceCopyModes[] = {"host_to_host", "device_to_host", "host_to_device", "device_to_device", "auto", "PetscDeviceCopyMode", "PETSC_DEVICE_COPY_", nullptr};
 25: static_assert(Petsc::util::to_underlying(PETSC_DEVICE_COPY_HTOH) == 0, "");
 26: static_assert(Petsc::util::to_underlying(PETSC_DEVICE_COPY_DTOH) == 1, "");
 27: static_assert(Petsc::util::to_underlying(PETSC_DEVICE_COPY_HTOD) == 2, "");
 28: static_assert(Petsc::util::to_underlying(PETSC_DEVICE_COPY_DTOD) == 3, "");
 29: static_assert(Petsc::util::to_underlying(PETSC_DEVICE_COPY_AUTO) == 4, "");

 31: // GCC implementation for std::hash<T*>. LLVM's libc++ is almost 2x slower because they do all
 32: // kinds of complicated murmur hashing, so we make sure to enforce GCC's version.
 33: struct PointerHash {
 34:   template <typename T>
 35:   PETSC_NODISCARD std::size_t operator()(const T *ptr) const noexcept
 36:   {
 37:     return reinterpret_cast<std::size_t>(ptr);
 38:   }
 39: };

 41: // ==========================================================================================
 42: // PointerAttributes
 43: //
 44: // A set of attributes for a pointer
 45: // ==========================================================================================

 47: struct PointerAttributes {
 48:   PetscMemType  mtype = PETSC_MEMTYPE_HOST; // memtype of allocation
 49:   PetscObjectId id    = 0;                  // id of allocation
 50:   std::size_t   size  = 0;                  // size of allocation (bytes)

 52:   // even though this is a POD and can be aggregate initialized, the STL uses () constructors
 53:   // in unordered_map and so we need to provide a trivial constructor...
 54:   constexpr PointerAttributes() = default;
 55:   constexpr PointerAttributes(PetscMemType, PetscObjectId, std::size_t) noexcept;

 57:   bool operator==(const PointerAttributes &) const noexcept;

 59:   PETSC_NODISCARD bool contains(const void *, const void *) const noexcept;
 60: };

 62: // ==========================================================================================
 63: // PointerAttributes - Public API
 64: // ==========================================================================================

 66: inline constexpr PointerAttributes::PointerAttributes(PetscMemType mtype_, PetscObjectId id_, std::size_t size_) noexcept : mtype(mtype_), id(id_), size(size_) { }

 68: inline bool PointerAttributes::operator==(const PointerAttributes &other) const noexcept
 69: {
 70:   return (mtype == other.mtype) && (id == other.id) && (size == other.size);
 71: }

 73: /*
 74:   PointerAttributes::contains - asks and answers the question, does ptr_begin contain ptr

 76:   Input Parameters:
 77: + ptr_begin - pointer to the start of the range to check
 78: - ptr       - the pointer to query

 80:   Notes:
 81:   Returns true if ptr falls within ptr_begins range, false otherwise.
 82: */
 83: inline bool PointerAttributes::contains(const void *ptr_begin, const void *ptr) const noexcept
 84: {
 85:   return (ptr >= ptr_begin) && (ptr < (static_cast<const char *>(ptr_begin) + size));
 86: }

 88: // ==========================================================================================
 89: // MemoryMap
 90: //
 91: // Since the pointers allocated via PetscDeviceAllocate_Private() may be device pointers we
 92: // cannot just store meta-data within the pointer itself (as we can't dereference them). So
 93: // instead we need to keep an extra map to keep track of them
 94: //
 95: // Each entry maps pointer -> {
 96: //   PetscMemType  - The memtype of the pointer
 97: //   PetscObjectId - A unique ID assigned at allocation or registration so auto-dep can
 98: //                   identify the pointer
 99: //   size          - The size (in bytes) of the allocation
100: // }
101: // ==========================================================================================

103: class MemoryMap : public Petsc::RegisterFinalizeable<MemoryMap> {
104: public:
105:   using map_type = Petsc::UnorderedMap<void *, PointerAttributes, PointerHash>;

107:   map_type map{};

109:   PETSC_NODISCARD map_type::const_iterator search_for(const void *, bool = false) const noexcept;

111: private:
112:   friend class Petsc::RegisterFinalizeable<MemoryMap>;
113:   PetscErrorCode register_finalize_() noexcept;
114:   PetscErrorCode finalize_() noexcept;
115: };

117: // ==========================================================================================
118: // MemoryMap - Private API
119: // ==========================================================================================

121: PetscErrorCode MemoryMap::register_finalize_() noexcept
122: {
123:   PetscFunctionBegin;
124:   // Preallocate, this does give a modest performance bump since unordered_map is so __dog__
125:   // slow if it needs to rehash. Experiments show that users tend not to have more than 5 or
126:   // so concurrently live pointers lying around. 10 at most.
127:   PetscCall(map.reserve(16));
128:   PetscFunctionReturn(PETSC_SUCCESS);
129: }

131: PetscErrorCode MemoryMap::finalize_() noexcept
132: {
133:   PetscFunctionBegin;
134:   PetscCall(PetscInfo(nullptr, "Finalizing memory map\n"));
135:   PetscCallCXX(map = map_type{});
136:   PetscFunctionReturn(PETSC_SUCCESS);
137: }

139: // ==========================================================================================
140: // MemoryMap - Public API
141: // ==========================================================================================

143: /*
144:   MemoryMap::search_for - retrieve an iterator to the key-value pair for a pointer in the map

146:   Input Parameters:
147: + ptr       - pointer to search for
148: - must_find - true if an error is raised if the pointer is not found (default: false)

150:   Notes:
151:   Accounts for sub-regions, i.e. if ptr is contained within another pointers region, it returns
152:   the iterator to the super-pointers key-value pair.

154:   If ptr is not found and must_find is false returns map.end(), otherwise raises an error
155: */
156: MemoryMap::map_type::const_iterator MemoryMap::search_for(const void *ptr, bool must_find) const noexcept
157: {
158:   const auto end_it = map.end();
159:   auto       it     = map.find(const_cast<map_type::key_type>(ptr));

161:   // ptr was found, and points to an entire block
162:   PetscFunctionBegin;
163:   if (it != end_it) PetscFunctionReturn(it);
164:   // wasn't found, but maybe its part of a block. have to search every block for it
165:   // clang-format off
166:   it = std::find_if(map.begin(), end_it, [ptr](map_type::const_iterator::reference map_it) {
167:     return map_it.second.contains(map_it.first, ptr);
168:   });
169:   // clang-format on
170:   PetscCheckAbort(!must_find || it != end_it, PETSC_COMM_SELF, PETSC_ERR_POINTER, "Pointer %p was not registered with the memory tracker, call PetscDeviceRegisterMemory() on it", ptr);
171:   PetscFunctionReturn(it);
172: }

174: static MemoryMap memory_map;

176: // ==========================================================================================
177: // Utility functions
178: // ==========================================================================================

180: static PetscErrorCode PetscDeviceCheckCapable_Private(PetscDeviceContext dctx, bool cond, const char descr[])
181: {
182:   PetscFunctionBegin;
183:   PetscCheck(cond, PETSC_COMM_SELF, PETSC_ERR_SUP, "Device context (id: %" PetscInt64_FMT ", name: %s, type: %s) can only handle %s host memory", PetscObjectCast(dctx)->id, PetscObjectCast(dctx)->name, dctx->device ? PetscDeviceTypes[dctx->device->type] : "unknown", descr);
184:   PetscFunctionReturn(PETSC_SUCCESS);
185: }

187: // A helper utility, since register is called from PetscDeviceRegisterMemory() and
188: // PetscDevicAllocate(). The latter also needs the generated id, so instead of making it search
189: // the map again we just return it here
190: static PetscErrorCode PetscDeviceRegisterMemory_Private(const void *PETSC_RESTRICT ptr, PetscMemType mtype, std::size_t size, PetscObjectId *PETSC_RESTRICT id = nullptr)
191: {
192:   auto      &map = memory_map.map;
193:   const auto it  = memory_map.search_for(ptr);

195:   PetscFunctionBegin;
196:   if (it == map.cend()) {
197:     // pointer was never registered with the map, insert it and bail
198:     const auto newid = PetscObjectNewId_Internal();

200:     if (PetscDefined(USE_DEBUG)) {
201:       const auto tmp = PointerAttributes(mtype, newid, size);

203:       for (const auto &entry : map) {
204:         auto &&attr = entry.second;

206:         // REVIEW ME: maybe this should just be handled...
207:         PetscCheck(!tmp.contains(ptr, entry.first), PETSC_COMM_SELF, PETSC_ERR_ORDER, "Trying to register pointer %p (memtype %s, size %zu) but it appears you have already registered a sub-region of it (pointer %p, memtype %s, size %zu). Must register the larger region first", ptr, PetscMemTypeToString(mtype), size,
208:                    entry.first, PetscMemTypeToString(attr.mtype), attr.size);
209:       }
210:     }
211:     // clang-format off
212:     if (id) *id = newid;
213:     PetscCallCXX(map.emplace(
214:       std::piecewise_construct,
215:       std::forward_as_tuple(const_cast<MemoryMap::map_type::key_type>(ptr)),
216:       std::forward_as_tuple(mtype, newid, size)
217:     ));
218:     // clang-format on
219:     PetscFunctionReturn(PETSC_SUCCESS);
220:   }
221:   if (PetscDefined(USE_DEBUG)) {
222:     const auto &old = it->second;

224:     PetscCheck(PointerAttributes(mtype, old.id, size) == old, PETSC_COMM_SELF, PETSC_ERR_LIB, "Pointer %p appears to have been previously allocated with memtype %s, size %zu and assigned id %" PetscInt64_FMT ", which does not match new values: (mtype %s, size %zu, id %" PetscInt64_FMT ")", it->first,
225:                PetscMemTypeToString(old.mtype), old.size, old.id, PetscMemTypeToString(mtype), size, old.id);
226:   }
227:   if (id) *id = it->second.id;
228:   PetscFunctionReturn(PETSC_SUCCESS);
229: }

231: /*@C
232:   PetscDeviceRegisterMemory - Register a pointer for use with device-aware memory system

234:   Not Collective

236:   Input Parameters:
237: + ptr   - The pointer to register
238: . mtype - The `PetscMemType` of the pointer
239: - size  - The size (in bytes) of the memory region

241:   Notes:
242:   `ptr` need not point to the beginning of the memory range, however the user should register
243:   the

245:   It's OK to re-register the same `ptr` repeatedly (subsequent registrations do nothing)
246:   however the given `mtype` and `size` must match the original registration.

248:   `size` may be 0 (in which case this routine does nothing).

250:   Level: intermediate

252: .seealso: `PetscDeviceMalloc()`, `PetscDeviceArrayCopy()`, `PetscDeviceFree()`,
253: `PetscDeviceArrayZero()`
254: @*/
255: PetscErrorCode PetscDeviceRegisterMemory(const void *PETSC_RESTRICT ptr, PetscMemType mtype, std::size_t size)
256: {
257:   PetscFunctionBegin;
258:   if (PetscMemTypeHost(mtype)) PetscAssertPointer(ptr, 1);
259:   if (PetscUnlikely(!size)) PetscFunctionReturn(PETSC_SUCCESS); // there is no point registering empty range
260:   PetscCall(PetscDeviceRegisterMemory_Private(ptr, mtype, size));
261:   PetscFunctionReturn(PETSC_SUCCESS);
262: }

264: /*
265:   PetscDeviceAllocate_Private - Allocate device-aware memory

267:   Not Collective, Asynchronous, Auto-dependency aware

269:   Input Parameters:
270: + dctx      - The `PetscDeviceContext` used to allocate the memory
271: . clear     - Whether or not the memory should be zeroed
272: . mtype     - The type of memory to allocate
273: . n         - The amount (in bytes) to allocate
274: - alignment - The alignment requirement (in bytes) of the allocated pointer

276:   Output Parameter:
277: . ptr - The pointer to store the result in

279:   Notes:
280:   The user should prefer `PetscDeviceMalloc()` over this routine as it automatically computes
281:   the size of the allocation and alignment based on the size of the datatype.

283:   If the user is unsure about `alignment` -- or unable to compute it -- passing
284:   `PETSC_MEMALIGN` will always work, though the user should beware that this may be quite
285:   wasteful for very small allocations.

287:   Memory allocated with this function must be freed with `PetscDeviceFree()` (or
288:   `PetscDeviceDeallocate_Private()`).

290:   If `n` is zero, then `ptr` is set to `PETSC_NULLPTR`.

292:   This routine falls back to using `PetscMalloc1()` or `PetscCalloc1()` (depending on the value
293:   of `clear`) if PETSc was not configured with device support. The user should note that
294:   `mtype` and `alignment` are ignored in this case, as these routines allocate only host memory
295:   aligned to `PETSC_MEMALIGN`.

297:   Note result stored `ptr` is immediately valid and the user may freely inspect or manipulate
298:   its value on function return, i.e.\:

300: .vb
301:   PetscInt *ptr;

303:   PetscDeviceAllocate_Private(dctx, PETSC_FALSE, PETSC_MEMTYPE_DEVICE, 20, alignof(PetscInt), (void**)&ptr);

305:   PetscInt *sub_ptr = ptr + 10; // OK, no need to synchronize

307:   ptr[0] = 10; // ERROR, directly accessing contents of ptr is undefined until synchronization
308: .ve

310:   DAG representation:
311: .vb
312:   time ->

314:   -> dctx - |= CALL =| -\- dctx -->
315:                          \- ptr ->
316: .ve

318:   Level: intermediate

320: .N ASYNC_API

322: .seealso: `PetscDeviceMalloc()`, `PetscDeviceFree()`, `PetscDeviceDeallocate_Private()`,
323: `PetscDeviceArrayCopy()`, `PetscDeviceArrayZero()`, `PetscMemType`
324: */
325: PetscErrorCode PetscDeviceAllocate_Private(PetscDeviceContext dctx, PetscBool clear, PetscMemType mtype, std::size_t n, std::size_t alignment, void **PETSC_RESTRICT ptr)
326: {
327:   PetscObjectId id = 0;

329:   PetscFunctionBegin;
330:   if (PetscDefined(USE_DEBUG)) {
331:     const auto is_power_of_2 = [](std::size_t num) { return (num & (num - 1)) == 0; };

333:     PetscCheck(alignment != 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Requested alignment %zu cannot be 0", alignment);
334:     PetscCheck(is_power_of_2(alignment), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Requested alignment %zu must be a power of 2", alignment);
335:   }
336:   PetscAssertPointer(ptr, 6);
337:   *ptr = nullptr;
338:   if (PetscUnlikely(!n)) PetscFunctionReturn(PETSC_SUCCESS);
339:   PetscCall(memory_map.register_finalize());
340:   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));

342:   // get our pointer here
343:   if (dctx->ops->memalloc) {
344:     PetscUseTypeMethod(dctx, memalloc, clear, mtype, n, alignment, ptr);
345:   } else {
346:     PetscCall(PetscDeviceCheckCapable_Private(dctx, PetscMemTypeHost(mtype), "allocating"));
347:     PetscCall(PetscMallocA(1, clear, __LINE__, PETSC_FUNCTION_NAME, __FILE__, n, ptr));
348:   }
349:   PetscCall(PetscDeviceRegisterMemory_Private(*ptr, mtype, n, &id));
350:   // Note this is a "write" so that the next dctx to try and read from the pointer has to wait
351:   // for the allocation to be ready
352:   PetscCall(PetscDeviceContextMarkIntentFromID(dctx, id, PETSC_MEMORY_ACCESS_WRITE, "memory allocation"));
353:   PetscFunctionReturn(PETSC_SUCCESS);
354: }

356: /*
357:   PetscDeviceDeallocate_Private - Free device-aware memory

359:   Not Collective, Asynchronous, Auto-dependency aware

361:   Input Parameters:
362: + dctx  - The `PetscDeviceContext` used to free the memory
363: - ptr   - The pointer to free

365:   Level: intermediate

367:   Notes:
368:   `ptr` must have been allocated using any of `PetscDeviceMalloc()`, `PetscDeviceCalloc()` or
369:   `PetscDeviceAllocate_Private()`, or registered with the system via `PetscDeviceRegisterMemory()`.

371:   The user should prefer `PetscDeviceFree()` over this routine as it automatically sets `ptr`
372:   to `PETSC_NULLPTR` on successful deallocation.

374:   `ptr` may be `NULL`.

376:   This routine falls back to using `PetscFree()` if PETSc was not configured with device
377:   support. The user should note that `PetscFree()` frees only host memory.

379:   DAG representation:
380: .vb
381:   time ->

383:   -> dctx -/- |= CALL =| - dctx ->
384:   -> ptr -/
385: .ve

387: .N ASYNC_API

389: .seealso: `PetscDeviceFree()`, `PetscDeviceAllocate_Private()`
390: */
391: PetscErrorCode PetscDeviceDeallocate_Private(PetscDeviceContext dctx, void *PETSC_RESTRICT ptr)
392: {
393:   PetscFunctionBegin;
394:   if (ptr) {
395:     auto      &map      = memory_map.map;
396:     const auto found_it = map.find(const_cast<MemoryMap::map_type::key_type>(ptr));

398:     if (PetscUnlikelyDebug(found_it == map.end())) {
399:       // OK this is a bad pointer, now determine why
400:       const auto it = memory_map.search_for(ptr);

402:       // if it is map.cend() then no allocation owns it, meaning it was not allocated by us!
403:       PetscCheck(it != map.cend(), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Pointer %p was not allocated via PetscDeviceAllocate_Private()", ptr);
404:       // if we are here then we did allocate it but the user has tried to do something along
405:       // the lines of:
406:       //
407:       // allocate(&ptr, size);
408:       // deallocate(ptr+5);
409:       //
410:       auto &&attr = it->second;
411:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Attempting to deallocate pointer %p which is a suballocation of %p (memtype %s, id %" PetscInt64_FMT ", size %zu bytes)", ptr, it->first, PetscMemTypeToString(attr.mtype), attr.id, attr.size);
412:     }
413:     auto &&attr = found_it->second;
414:     PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
415:     // mark intent BEFORE we free, note we mark as write so that we are made to wait on any
416:     // outstanding reads (don't want to kill the pointer before they are done)
417:     PetscCall(PetscDeviceContextMarkIntentFromID(dctx, attr.id, PETSC_MEMORY_ACCESS_WRITE, "memory deallocation"));
418:     // do free
419:     if (dctx->ops->memfree) {
420:       PetscUseTypeMethod(dctx, memfree, attr.mtype, (void **)&ptr);
421:     } else {
422:       PetscCall(PetscDeviceCheckCapable_Private(dctx, PetscMemTypeHost(attr.mtype), "freeing"));
423:     }
424:     // if ptr still exists, then the device context could not handle it
425:     if (ptr) PetscCall(PetscFree(ptr));
426:     PetscCallCXX(map.erase(found_it));
427:   }
428:   PetscFunctionReturn(PETSC_SUCCESS);
429: }

431: // PetscClangLinter pragma disable: -fdoc-section-header-unknown
432: /*@C
433:   PetscDeviceMemcpy - Copy memory in a device-aware manner

435:   Not Collective, Asynchronous, Auto-dependency aware

437:   Input Parameters:
438: + dctx - The `PetscDeviceContext` used to copy the memory
439: . dest - The pointer to copy to
440: . src  - The pointer to copy from
441: - n    - The amount (in bytes) to copy

443:   Level: intermediate

445:   Notes:
446:   Both `dest` and `src` must have been allocated by `PetscDeviceMalloc()` or
447:   `PetscDeviceCalloc()`.

449:   `src` and `dest` cannot overlap.

451:   If both `src` and `dest` are on the host this routine is fully synchronous.

453:   The user should prefer `PetscDeviceArrayCopy()` over this routine as it automatically
454:   computes the number of bytes to copy from the size of the pointer types.

456:   DAG representation:
457: .vb
458:   time ->

460:   -> dctx - |= CALL =| - dctx ->
461:   -> dest --------------------->
462:   -> src ---------------------->
463: .ve

465: .N ASYNC_API

467: .seealso: `PetscDeviceArrayCopy()`, `PetscDeviceMalloc()`, `PetscDeviceCalloc()`,
468: `PetscDeviceFree()`
469: @*/
470: PetscErrorCode PetscDeviceMemcpy(PetscDeviceContext dctx, void *PETSC_RESTRICT dest, const void *PETSC_RESTRICT src, std::size_t n)
471: {
472:   PetscFunctionBegin;
473:   if (!n) PetscFunctionReturn(PETSC_SUCCESS);
474:   PetscCheck(dest, PETSC_COMM_SELF, PETSC_ERR_POINTER, "Trying to copy to a NULL pointer");
475:   PetscCheck(src, PETSC_COMM_SELF, PETSC_ERR_POINTER, "Trying to copy from a NULL pointer");
476:   if (dest == src) PetscFunctionReturn(PETSC_SUCCESS);
477:   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
478:   {
479:     const auto &dest_attr = memory_map.search_for(dest, true)->second;
480:     const auto &src_attr  = memory_map.search_for(src, true)->second;
481:     const auto  mode      = PetscMemTypeToDeviceCopyMode(dest_attr.mtype, src_attr.mtype);

483:     PetscCall(PetscDeviceContextMarkIntentFromID(dctx, src_attr.id, PETSC_MEMORY_ACCESS_READ, "memory copy (src)"));
484:     PetscCall(PetscDeviceContextMarkIntentFromID(dctx, dest_attr.id, PETSC_MEMORY_ACCESS_WRITE, "memory copy (dest)"));
485:     // perform the copy
486:     if (dctx->ops->memcopy) {
487:       PetscUseTypeMethod(dctx, memcopy, dest, src, n, mode);
488:       if (mode == PETSC_DEVICE_COPY_HTOD) {
489:         PetscCall(PetscLogCpuToGpu(n));
490:       } else if (mode == PETSC_DEVICE_COPY_DTOH) {
491:         PetscCall(PetscLogGpuToCpu(n));
492:       }
493:     } else {
494:       // REVIEW ME: we might potentially need to sync here if the memory is device-allocated
495:       // (pinned) but being copied by a host dctx
496:       PetscCall(PetscDeviceCheckCapable_Private(dctx, mode == PETSC_DEVICE_COPY_HTOH, "copying"));
497:       PetscCall(PetscMemcpy(dest, src, n));
498:     }
499:   }
500:   PetscFunctionReturn(PETSC_SUCCESS);
501: }

503: // PetscClangLinter pragma disable: -fdoc-section-header-unknown
504: /*@C
505:   PetscDeviceMemset - Memset device-aware memory

507:   Not Collective, Asynchronous, Auto-dependency aware

509:   Input Parameters:
510: + dctx - The `PetscDeviceContext` used to memset the memory
511: . ptr  - The pointer to the memory
512: . v    - The value to set
513: - n    - The amount (in bytes) to set

515:   Level: intermediate

517:   Notes:
518:   `ptr` must have been allocated by `PetscDeviceMalloc()` or `PetscDeviceCalloc()`.

520:   The user should prefer `PetscDeviceArrayZero()` over this routine as it automatically
521:   computes the number of bytes to copy from the size of the pointer types, though they should
522:   note that it only zeros memory.

524:   This routine is analogous to `memset()`. That is, this routine copies the value
525:   `static_cast<unsigned char>(v)` into each of the first count characters of the object pointed
526:   to by `dest`.

528:   If `dest` is on device, this routine is asynchronous.

530:   DAG representation:
531: .vb
532:   time ->

534:   -> dctx - |= CALL =| - dctx ->
535:   -> dest --------------------->
536: .ve

538: .N ASYNC_API

540: .seealso: `PetscDeviceArrayZero()`, `PetscDeviceMalloc()`, `PetscDeviceCalloc()`,
541: `PetscDeviceFree()`
542: @*/
543: PetscErrorCode PetscDeviceMemset(PetscDeviceContext dctx, void *ptr, PetscInt v, std::size_t n)
544: {
545:   PetscFunctionBegin;
546:   if (PetscUnlikely(!n)) PetscFunctionReturn(PETSC_SUCCESS);
547:   PetscCheck(ptr, PETSC_COMM_SELF, PETSC_ERR_POINTER, "Trying to memset a NULL pointer");
548:   PetscCall(PetscDeviceContextGetOptionalNullContext_Internal(&dctx));
549:   {
550:     const auto &attr = memory_map.search_for(ptr, true)->second;

552:     PetscCall(PetscDeviceContextMarkIntentFromID(dctx, attr.id, PETSC_MEMORY_ACCESS_WRITE, "memory set"));
553:     if (dctx->ops->memset) {
554:       PetscUseTypeMethod(dctx, memset, attr.mtype, ptr, v, n);
555:     } else {
556:       // REVIEW ME: we might potentially need to sync here if the memory is device-allocated
557:       // (pinned) but being memset by a host dctx
558:       PetscCall(PetscDeviceCheckCapable_Private(dctx, PetscMemTypeHost(attr.mtype), "memsetting"));
559:       std::memset(ptr, static_cast<int>(v), n);
560:     }
561:   }
562:   PetscFunctionReturn(PETSC_SUCCESS);
563: }