代码拉取完成,页面将自动刷新
require "torch"
paths.require("libcutorch")
torch.CudaByteStorage.__tostring__ = torch.ByteStorage.__tostring__
torch.CudaByteTensor.__tostring__ = torch.ByteTensor.__tostring__
torch.CudaCharStorage.__tostring__ = torch.CharStorage.__tostring__
torch.CudaCharTensor.__tostring__ = torch.CharTensor.__tostring__
torch.CudaShortStorage.__tostring__ = torch.ShortStorage.__tostring__
torch.CudaShortTensor.__tostring__ = torch.ShortTensor.__tostring__
torch.CudaIntStorage.__tostring__ = torch.IntStorage.__tostring__
torch.CudaIntTensor.__tostring__ = torch.IntTensor.__tostring__
torch.CudaLongStorage.__tostring__ = torch.LongStorage.__tostring__
torch.CudaLongTensor.__tostring__ = torch.LongTensor.__tostring__
torch.CudaStorage.__tostring__ = torch.FloatStorage.__tostring__
torch.CudaTensor.__tostring__ = torch.FloatTensor.__tostring__
torch.CudaDoubleStorage.__tostring__ = torch.DoubleStorage.__tostring__
torch.CudaDoubleTensor.__tostring__ = torch.DoubleTensor.__tostring__
if cutorch.hasHalf then
torch.CudaHalfStorage.__tostring__ = torch.HalfStorage.__tostring__
torch.CudaHalfTensor.__tostring__ = torch.HalfTensor.__tostring__
end
require('cutorch.Tensor')
require('cutorch.FFI')
require('cutorch.test')
local unpack = unpack or table.unpack
function cutorch.withDevice(newDeviceID, closure)
local curDeviceID = cutorch.getDevice()
cutorch.setDevice(newDeviceID)
local vals = {pcall(closure)}
cutorch.setDevice(curDeviceID)
if vals[1] then
return unpack(vals, 2)
end
error(unpack(vals, 2))
end
local function longTensorSize(...)
local size
if not ... then
size = torch.LongTensor{0}
elseif torch.isStorage(...) then
size = torch.LongTensor(...)
else
size = torch.LongTensor{...}
end
return size
end
local hostTypes = {'Float', 'Double', 'Int', 'Long', 'Byte'}
if cutorch.hasHalf then
table.insert(hostTypes, 'Half')
end
for _, ty in ipairs(hostTypes) do
-- Creates torch Tensors using the CudaHostAllocator.
-- Accepts either a LongStorage or a sequence of numbers.
cutorch['createCudaHost' .. ty .. 'Tensor'] = function(...)
local size = longTensorSize(...)
local storage = torch[ty .. 'Storage'](cutorch.CudaHostAllocator, size:prod())
return torch[ty .. 'Tensor'](storage, 1, size:storage())
end
end
-- Alias to automate creation from both torch and cutorch types
cutorch.createCudaHostTensor = cutorch.createCudaHostFloatTensor
-- Creates a CudaTensor using the CudaUVAAllocator.
-- Accepts either a LongStorage or a sequence of numbers.
local function _createUVATensor(...)
local size = longTensorSize(...)
-- See CUDA_C_Programming_guide.pdf for detailed explanation about synchronization
-- Section J.
-- "It is worth a comment on the synchronization between host and device. Notice how in
-- the non-managed example, the synchronous cudaMemcpy() routine is used both to
-- synchronize the kernel (that is, to wait for it to finish running), and to transfer the data
-- to the host. The Unified Memory examples do not call cudaMemcpy() and so require an
-- explicit cudaDeviceSynchronize() before the host program can safely use the output
-- from the GPU."
-- Section J.2.2.1.
-- " Note that if memory is dynamically allocated with cudaMallocManaged() or
-- cuMemAllocManaged() while the GPU is active, the behavior of the memory is
-- unspecified until additional work is launched or the GPU is synchronized. Attempting
-- to access the memory on the CPU during this time may or may not cause a segmentation
-- fault."
cutorch.synchronize()
local storage = torch.FloatStorage(cutorch.CudaUVAAllocator, size:prod())
return torch.FloatTensor(storage)
end
function cutorch.createFloatUVATensor(...)
return _createUVATensor(...)
end
-- Creates a CudaTensor using the CudaUVAAllocator.
-- Accepts either a LongStorage or a sequence of numbers.
-- First creates a UVA backed FloatTensor and takes its pointer.
function cutorch.createCudaUVATensor(...)
-- Delegate actual allocation and synchronization to CPU tensor and
-- take the pointer.
local ft = _createUVATensor(...)
local storage = torch.CudaStorage(
ft:storage():size(),
tonumber(torch.data(ft:storage(), true))
)
return torch.CudaTensor(storage)
end
-- UVA storage is a single memory location backed by virtual addressing.
-- Converting between CPU / GPU tensor types is done by raw pointer passing.
-- We only support FloatTensor, CudaTensor, Cuda -> float and float -> Cuda atm
function cutorch.toFloatUVATensor(t)
if not torch.isTensor(t) then
error('Must use a tensor, got ' .. torch.type(t))
end
local storage = torch.FloatStorage(
t:storage():size(),
tonumber(torch.data(t:storage(), true))
)
assert(cutorch.isManaged(storage))
return torch.FloatTensor(storage)
end
function cutorch.toCudaUVATensor(t)
if not torch.isTensor(t) then
error('Must use a tensor, got ' .. torch.type(t))
end
local storage = torch.CudaStorage(
t:storage():size(),
tonumber(torch.data(t:storage(), true))
)
assert(cutorch.isManaged(storage))
return torch.CudaTensor(storage)
end
function cutorch.isManaged(t)
if not torch.isTensor(t) and not torch.isStorage(t) then
error('Usage: cutorch.isManaged(Tensor|Storage), got ' .. torch.type(t))
end
return cutorch.isManagedPtr(tonumber(torch.data(t, true)))
end
-- remove this line to disable automatic cutorch heap-tracking
-- for garbage collection
cutorch.setHeapTracking(true)
function torch.multinomialAliasSetup(probs, state)
if torch.type(state) == 'table' then
state[1], state[2] = torch.multinomialAliasSetup_(probs, state[1], state[2])
else
state = {}
state[1], state[2] = torch.multinomialAliasSetup_(probs)
end
return state
end
function torch.multinomialAlias(output, state)
torch.CudaTensor.multinomialAlias_(output, state[1], state[2])
return output
end
return cutorch
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。