Opened 6 months ago

Last modified 6 months ago

#10654 new defect

FFmpeg crashes on vulkan to cuda frame transfer

Reported by: Cyryl Ł Owned by:
Priority: normal Component: avutil
Version: git-master Keywords: vulkan cuda
Cc: Blocked By:
Blocking: Reproduced by developer: no
Analyzed by developer: no

Description

Ffmpeg build from current master on ubuntu-23.10.

How to reproduce:

% ffmpeg -y -hwaccel vulkan -hwaccel_output_format vulkan -i in.mp4 -vf 'hwupload_cuda' -c:v h264_nvenc -crf 21 -c:a copy out.mp4

Output:

ffmpeg version git-2023-11-11-fa81de4 Copyright (c) 2000-2023 the FFmpeg developers                                                                                                               
  built with gcc 13 (Ubuntu 13.2.0-4ubuntu3)
  configuration: --disable-doc --enable-libx264 --enable-libx265 --enable-gpl --enable-libmp3lame --enable-libfdk-aac --enable-cuda-nvcc --enable-nvdec --enable-nvenc --enable-cuvid --enable-nonfree --enable-libnpp --enable-libglslang --disable-optimizations --disable-stripping --enable-debug
  libavutil      58. 32.100 / 58. 32.100
  libavcodec     60. 33.100 / 60. 33.100
  libavformat    60. 17.100 / 60. 17.100
  libavdevice    60.  4.100 / 60.  4.100
  libavfilter     9. 13.100 /  9. 13.100
  libswscale      7.  6.100 /  7.  6.100
  libswresample   4. 13.100 /  4. 13.100
  libpostproc    57.  4.100 / 57.  4.100
Input #0, mov,mp4,m4a,3gp,3g2,mj2, from 'in.mp4':
  Metadata:
    major_brand     : isom
    minor_version   : 512
    compatible_brands: isomiso2avc1mp41
    encoder         : Lavf60.3.100
  Duration: 00:22:42.45, start: 0.000000, bitrate: 2009 kb/s
  Stream #0:0[0x1](und): Video: h264 (Main) (avc1 / 0x31637661), yuv420p(tv, bt709, progressive), 1920x1080 [SAR 1:1 DAR 16:9], 1874 kb/s, 25 fps, 25 tbr, 12800 tbn (default)
    Metadata:
      handler_name    : ISO Media file produced by Google Inc. Created on: 10/29/2023.
      vendor_id       : [0][0][0][0]
      encoder         : Lavc60.3.100 h264_nvenc
  Stream #0:1[0x2](eng): Audio: aac (LC) (mp4a / 0x6134706D), 44100 Hz, stereo, fltp, 128 kb/s (default)
    Metadata:
      handler_name    : ISO Media fille produced by Google Inc.
      vendor_id       : [0][0][0][0]
Stream mapping:
  Stream #0:0 -> #0:0 (h264 (native) -> h264 (h264_nvenc))
  Stream #0:1 -> #0:1 (copy)
Press [q] to stop, [?] for help
Segmentation fault (core dumped)

GDB info:

0x00007fa29c2281a9 in ?? () from /lib64/libnvidia-eglcore.so.535.129.03
(gdb) bt
#0  0x00007fa29c2281a9 in ?? () from /lib64/libnvidia-eglcore.so.535.129.03
#1  0x00005616b9c2ddcc in vulkan_export_to_cuda (hwfc=0x7fa27c05c700, cuda_hwfc=0x5616c0661a40, frame=0x5616c06615c0) at libavutil/hwcontext_vulkan.c:2922
#2  0x00005616b9c2fb5e in vulkan_transfer_data_to_cuda (hwfc=0x7fa27c05c700, dst=0x5616c0661800, src=0x5616c06615c0) at libavutil/hwcontext_vulkan.c:3601
#3  0x00005616b9c30115 in vulkan_transfer_data_from (hwfc=0x7fa27c05c700, dst=0x5616c0661800, src=0x5616c06615c0) at libavutil/hwcontext_vulkan.c:3678
#4  0x00005616b9c1fab1 in av_hwframe_transfer_data (dst=0x5616c0661800, src=0x5616c06615c0, flags=0) at libavutil/hwcontext.c:482
#5  0x00005616b8729caf in cudaupload_filter_frame (link=0x5616c0660400, in=0x5616c06615c0) at libavfilter/vf_hwupload_cuda.c:140
#6  0x00005616b85af33c in ff_filter_frame_framed (link=0x5616c0660400, frame=0x5616c06615c0) at libavfilter/avfilter.c:969
#7  0x00005616b85afa5e in ff_filter_frame_to_filter (link=0x5616c0660400) at libavfilter/avfilter.c:1123
#8  0x00005616b85afc7c in ff_filter_activate_default (filter=0x5616bd0beac0) at libavfilter/avfilter.c:1172
#9  0x00005616b85afde0 in ff_filter_activate (filter=0x5616bd0beac0) at libavfilter/avfilter.c:1331
#10 0x00005616b85b4dd7 in ff_filter_graph_run_once (graph=0x5616bd0bd8c0) at libavfilter/avfiltergraph.c:1353
#11 0x00005616b85b63ae in push_frame (graph=0x5616bd0bd8c0) at libavfilter/buffersrc.c:167
#12 0x00005616b85b6a0b in av_buffersrc_add_frame_flags (ctx=0x5616bff44fc0, frame=0x5616bcee83c0, flags=4) at libavfilter/buffersrc.c:271
#13 0x00005616b8542f21 in ifilter_send_frame (ifilter=0x5616bd958040, frame=0x5616bcee83c0, keep_reference=0) at fftools/ffmpeg_filter.c:2448
#14 0x00005616b852bccc in send_frame_to_filters (ist=0x5616bcee7400, decoded_frame=0x5616bcee8180) at fftools/ffmpeg_dec.c:153
#15 0x00005616b852dea9 in dec_packet (ist=0x5616bcee7400, pkt=0x7fa278007fc0, no_eof=0) at fftools/ffmpeg_dec.c:813
#16 0x00005616b8569741 in process_input_packet (ist=0x5616bcee7400, pkt=0x7fa278007fc0, no_eof=0) at fftools/ffmpeg.c:811
#17 0x00005616b856a77d in process_input (file_index=0) at fftools/ffmpeg.c:1115
#18 0x00005616b856a885 in transcode_step (ost=0x5616bd0b22c0) at fftools/ffmpeg.c:1142
#19 0x00005616b856aa89 in transcode (err_rate_exceeded=0x7ffec32d2c30) at fftools/ffmpeg.c:1204
#20 0x00005616b856b01d in main (argc=17, argv=0x7ffec32d2d88) at fftools/ffmpeg.c:1330
(gdb) disass $pc-32,$pc+32
Dump of assembler code from 0x7fa29c228189 to 0x7fa29c2281c9:
   0x00007fa29c228189:  std
   0x00007fa29c22818a:  (bad)
   0x00007fa29c22818b:  (bad)
   0x00007fa29c22818c:  incl   0x200f9(%rcx)
   0x00007fa29c228192:  add    %dh,-0x3d(%rbx,%rcx,1)
   0x00007fa29c228196:  cs nopw 0x0(%rax,%rax,1)
   0x00007fa29c2281a0:  mov    $0x1,%ecx
   0x00007fa29c2281a5:  sub    $0x8,%rsp
=> 0x00007fa29c2281a9:  mov    0xf0(%rdi),%rsi
   0x00007fa29c2281b0:  mov    0xe8(%rdi),%rdi
   0x00007fa29c2281b7:  call   0x7fa29c226300
   0x00007fa29c2281bc:  cmp    $0x1,%eax
   0x00007fa29c2281bf:  sbb    %eax,%eax
   0x00007fa29c2281c1:  add    $0x8,%rsp
   0x00007fa29c2281c5:  not    %eax
   0x00007fa29c2281c7:  and    $0xfffffffd,%eax
End of assembler dump.
(gdb) info all-registers
rax            0x0                 0
rbx            0x7ffec32d2d88      140732172938632
rcx            0x0                 0
rdx            0x7ffec32d1a18      140732172933656
rsi            0x7ffec32d1970      140732172933488
rdi            0x0                 0
rbp            0x7ffec32d1b00      0x7ffec32d1b00
rsp            0x7ffec32d18c0      0x7ffec32d18c0
r8             0x7fa29c1585b0      140336380085680
r9             0x0                 0
r10            0x0                 0
r11            0x0                 0
r12            0x0                 0
r13            0x7ffec32d2e18      140732172938776
r14            0x5616ba9092d8      94655619306200
r15            0x7fa2d24e3000      140337289768960
rip            0x7fa29c2281a9      0x7fa29c2281a9
eflags         0x10206             [ PF IF RF ]
cs             0x33                51
ss             0x2b                43
ds             0x0                 0
es             0x0                 0
fs             0x0                 0
gs             0x0                 0
st0            <invalid float value> (raw 0xffff0000000000000000)
st1            <invalid float value> (raw 0xffff0000000000000000)
st2            <invalid float value> (raw 0xffff0000000000000000)
--Type <RET> for more, q to quit, c to continue without paging--
st3            <invalid float value> (raw 0xffff0000000000000000)
st4            <invalid float value> (raw 0xffff0000000000000000)
st5            <invalid float value> (raw 0xffff0606060606060606)
st6            <invalid float value> (raw 0xffff0303030303030303)
st7            <invalid float value> (raw 0xffff0101010101010101)
fctrl          0x37f               895
fstat          0x0                 0
ftag           0xffff              65535
fiseg          0x0                 0
fioff          0x0                 0
foseg          0x0                 0
fooff          0x0                 0
fop            0x0                 0
mxcsr          0x1fa8              [ OE PE IM DM ZM OM UM PM ]
fs_base        0x7fa2cf9d9000      140337244639232
gs_base        0x0                 0
ymm0           {v16_bfloat16 = {0x0 <repeats 16 times>}, v16_half = {0x0 <repeats 16 times>}, v8_float = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v4_double = {0x0, 0x0, 0x0, 0x0}, v32_int8 = {0x0 <repeats 32 times>}, v16_int16 = {0x0 <repeats 16 times>}, v8_int32 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v4_int64 = {0x0, 0x0, 0x0, 0x0}, v2_int128 = {0x0, 0x0}}
ymm1           {v16_bfloat16 = {0x3ee0, 0xc067, 0x5616, 0x0 <repeats 13 times>}, v16_half = {0x3ee0, 0xc067, 0x5616, 0x0 <repeats 13 times>}, v8_float = {0xc0673ee0, 0x5616, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v4_double = {0x5616c0673ee0, 0x0, 0x0, 0x0}, v32_int8 = {0xe0, 0x3e, 0x67, 0xc0, 0x16, 0x56, 0x0 <repeats 26 times>}, v16_int16 = {0x3ee0, 0xc067, 0x5616, 0x0 <repeats 13 times>}, v8_int32 = {0xc0673ee0, 0x5616, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v4_int64 = {0x5616c0673ee0, 0x0, 0x0, 0x0}, v2_int128 = {0x5616c0673ee0, 0x0}}
ymm2           {v16_bfloat16 = {0x4ab0, 0xc067, 0x5616, 0x0, 0x4ab0, 0xc067, 0x5616, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v16_half = {0x4ab0, 0xc067, 0x5616, 0x0, 0x4ab0, 0xc067, 0x5616, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v8_float = {0xc0674ab0, 0x5616, 0xc0674ab0, 0x5616, 0x0, 0x0, 0x0, 0x0}, v4_double = {0x5616c0674ab0, 0x5616c0674ab0, 0x0, 0x0}, v32_int8 = {0xb0, 0x4a, 0x67, 0xc0, 0x16, 0x56, 0x0, 0x0, 0xb0, 0x4a, 0x67, 0xc0, 0x16, 0x56, 0x0 <repeats 18 times>}, v16_int16 = {0x4ab0, 0xc067, 0x5616, 0x0, 0x4ab0, 0xc067, 0x5616, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v8_int32 = {0xc0674ab0, 0x5616, 0xc0674ab0, 0x5616, 0x0, 0x0, 0x0, 0x0}, v4_int64 = {0x5616c0674ab0, 0x5616c0674ab0, 0x0, 0x0}, v2_int128 = {0x5616c0674ab000005616c0674ab0, 0x0}}
ymm3           {v16_bfloat16 = {0xf089, 0xff71, 0x4656, 0xdafe, 0xf1a9, 0xb54, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v16_half = {0xf089, 0xff71, 0x4656, 0xdafe, 0xf1a9, 0xb54, 0x1, --Type <RET> for more, q to quit, c to continue without paging--
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v8_float = {0xff71f089, 0xdafe4656, 0xb54f1a9, 0x1, 0x0, 0x0, 0x0, 0x0}, v4_double = {0xdafe4656ff71f089, 0x10b54f1a9, 0x0, 0x0}, v32_int8 = {0x89, 0xf0, 0x71, 0xff, 0x56, 0x46, 0xfe, 0xda, 0xa9, 0xf1, 0x54, 0xb, 0x1, 0x0 <repeats 19 times>}, v16_int16 = {0xf089, 0xff71, 0x4656, 0xdafe, 0xf1a9, 0xb54, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v8_int32 = {0xff71f089, 0xdafe4656, 0xb54f1a9, 0x1, 0x0, 0x0, 0x0, 0x0}, v4_int64 = {0xdafe4656ff71f089, 0x10b54f1a9, 0x0, 0x0}, v2_int128 = {0x10b54f1a9dafe4656ff71f089, 0x0}}
ymm4           {v16_bfloat16 = {0x1, 0x0 <repeats 15 times>}, v16_half = {0x1, 0x0 <repeats 15 times>}, v8_float = {0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v4_double = {0x1, 0x0, 0x0, 0x0}, v32_int8 = {0x1, 0x0 <repeats 31 times>}, v16_int16 = {0x1, 0x0 <repeats 15 times>}, v8_int32 = {0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v4_int64 = {0x1, 0x0, 0x0, 0x0}, v2_int128 = {0x1, 0x0}}
ymm5           {v16_bfloat16 = {0xb94e, 0x6bd4, 0xf089, 0xff71, 0x4656, 0xdafe, 0xf1a9, 0xb54, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v16_half = {0xb94e, 0x6bd4, 0xf089, 0xff71, 0x4656, 0xdafe, 0xf1a9, 0xb54, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v8_float = {0x6bd4b94e, 0xff71f089, 0xdafe4656, 0xb54f1a9, 0x0, 0x0, 0x0, 0x0}, v4_double = {0xff71f0896bd4b94e, 0xb54f1a9dafe4656, 0x0, 0x0}, v32_int8 = {0x4e, 0xb9, 0xd4, 0x6b, 0x89, 0xf0, 0x71, 0xff, 0x56, 0x46, 0xfe, 0xda, 0xa9, 0xf1, 0x54, 0xb, 0x0 <repeats 16 times>}, v16_int16 = {0xb94e, 0x6bd4, 0xf089, 0xff71, 0x4656, 0xdafe, 0xf1a9, 0xb54, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v8_int32 = {0x6bd4b94e, 0xff71f089, 0xdafe4656, 0xb54f1a9, 0x0, 0x0, 0x0, 0x0}, v4_int64 = {0xff71f0896bd4b94e, 0xb54f1a9dafe4656, 0x0, 0x0}, v2_int128 = {0xb54f1a9dafe4656ff71f0896bd4b94e, 0x0}}
ymm6           {v16_bfloat16 = {0x1, 0x0 <repeats 15 times>}, v16_half = {0x1, 0x0 <repeats 15 times>}, v8_float = {0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v4_double = {0x1, 0x0, 0x0, 0x0}, v32_int8 = {0x1, 0x0 <repeats 31 times>}, v16_int16 = {0x1, 0x0 <repeats 15 times>}, v8_int32 = {0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v4_int64 = {0x1, 0x0, 0x0, 0x0}, v2_int128 = {0x1, 0x0}}
ymm7           {v16_bfloat16 = {0x0 <repeats 16 times>}, v16_half = {0x0 <repeats 16 times>}, v8_float = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v4_double = {0x0, 0x0, 0x0, 0x0}, v32_int8 = {0x0 <repeats 32 times>}, v16_int16 = {0x0 <repeats 16 times>}, v8_int32 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v4_int64 = {0x0, 0x0, 0x0, 0x0}, v2_int128 = {0x0, 0x0}}
ymm8           {v16_bfloat16 = {0x7c12, 0x2ff, 0x4, 0x0, 0xcff, 0xf80, 0xd800, 0xf, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v16_half = {0x7c12, 0x2ff, 0x4, 0x0, 0xcff, 0xf80, 0xd800, 0xf, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v8_float = {0x2ff7c12, 0x4, 0xf800cff, 0xfd800, 0x0, 0x0, 0x0, 0x0}, v4_double = {0x402ff7c12, 0xfd8000f800cff, 0x0, 0x0}, v32_int8 = {0x12, 0x7c, 0xff, 0x2, 0x4, 0x0, 0x0, 0x0, 0xff, 0xc, 0x80, 0xf, 0x0, 0xd8, 0xf, 0x0 <repeats 17 times>}, v16_int16 = {0x7c12, 0x2ff, 0x4, 0x0, 0xcff, 0xf80, 0xd800, 0xf, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v8_int32 = {0x2ff7c12, 0x4, 0xf800cff, 0xfd800, 0x0, 0x0, 0x0, 0x0}, v4_int64 = {0x402ff7c12, 0xfd8000f800cff, 0x0, 0x0}, v2_int128 = {0xfd8000f800cff0000000402ff7c12, 0x0}}
ymm9           {v16_bfloat16 = {0xcccd, 0x3e4c, 0x0 <repeats 14 times>}, v16_half = {0xcccd, 0x3e4c, 0x0 <repeats 14 times>}, v8_float = {0x3e4ccccd, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v4_double = {0x3e4ccccd, 0x0, 0x0, 0x0}, v32_int8 = {0xcd, 0xcc, 0x4c, 0x3e, 0x0 <repeats 28 times>}, v16_int16 = {0xcccd, 0x3e4c, 0x0 <repeats 14 times>}, v8_int32 = {0x3e4ccccd, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v4_int64 = {0x3e4ccccd, 0x0, 0x0, 0x0}, v2_int128 = {0x3e4ccccd, 0x0}}
ymm10          {v16_bfloat16 = {0xcccd, 0x3e4c, 0x0 <repeats 14 times>}, v16_half = {0xcccd, 0x3e4c, 0x0 <repeats 14 times>}, v8_float = {0x3e4ccccd, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v4_double = {0x3e4ccccd, 0x0, 0x0, 0x0}, v32_int8 = {0xcd, 0xcc, 0x4c, 0x3e, 0x0 <repeats 28 times>}, v16_int16 = {0xcccd, 0x3e4c, 0x0 <repeats 14 times>}, v8_int32 = {0x3e4ccccd, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v4_int64 = {0x3e4ccccd, 0x0, 0x0, 0x0}, v2_int128 = {0x3e4ccccd, 0x0}}
ymm11          {v16_bfloat16 = {0x9999, 0x9999, 0x9999, 0x9999, 0x9999, 0x9999, 0x9999, 0x9999, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v16_half = {0x9999, 0x9999, 0x9999, 0x9999, 0x9999, 0x9999, 0x9999, 0x9999, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v8_float = {0x99999999, 0x99999999, 0x99999999, 0x99999999, 0x0, 0x0, 0x0, 0x0}, v4_double = {0x9999999999999999, 0x9999999999999999, 0x0, 0x0}, v32_int8 = {0x99 <repeats 16 times>, 0x0 <repeats 16 times>}, v16_int16 = {0x9999, 0x9999, 0x9999, 0x9999, 0x9999, 0x9999, 0x9999, 0x9999, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v8_--Type <RET> for more, q to quit, c to continue without paging--
int32 = {0x99999999, 0x99999999, 0x99999999, 0x99999999, 0x0, 0x0, 0x0, 0x0}, v4_int64 = {0x9999999999999999, 0x9999999999999999, 0x0, 0x0}, v2_int128 = {0x99999999999999999999999999999999, 0x0}}
ymm12          {v16_bfloat16 = {0x2020, 0x2020, 0x2020, 0x2020, 0x2020, 0x2020, 0x2020, 0x2020, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v16_half = {0x2020, 0x2020, 0x2020, 0x2020, 0x2020, 0x2020, 0x2020, 0x2020, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v8_float = {0x20202020, 0x20202020, 0x20202020, 0x20202020, 0x0, 0x0, 0x0, 0x0}, v4_double = {0x2020202020202020, 0x2020202020202020, 0x0, 0x0}, v32_int8 = {0x20 <repeats 16 times>, 0x0 <repeats 16 times>}, v16_int16 = {0x2020, 0x2020, 0x2020, 0x2020, 0x2020, 0x2020, 0x2020, 0x2020, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v8_int32 = {0x20202020, 0x20202020, 0x20202020, 0x20202020, 0x0, 0x0, 0x0, 0x0}, v4_int64 = {0x2020202020202020, 0x2020202020202020, 0x0, 0x0}, v2_int128 = {0x20202020202020202020202020202020, 0x0}}
ymm13          {v16_bfloat16 = {0x0, 0x0, 0x0, 0x8000, 0x0, 0x0, 0x0, 0x8000, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v16_half = {0x0, 0x0, 0x0, 0x8000, 0x0, 0x0, 0x0, 0x8000, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v8_float = {0x0, 0x80000000, 0x0, 0x80000000, 0x0, 0x0, 0x0, 0x0}, v4_double = {0x8000000000000000, 0x8000000000000000, 0x0, 0x0}, v32_int8 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x80, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x80, 0x0 <repeats 16 times>}, v16_int16 = {0x0, 0x0, 0x0, 0x8000, 0x0, 0x0, 0x0, 0x8000, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v8_int32 = {0x0, 0x80000000, 0x0, 0x80000000, 0x0, 0x0, 0x0, 0x0}, v4_int64 = {0x8000000000000000, 0x8000000000000000, 0x0, 0x0}, v2_int128 = {0x80000000000000008000000000000000, 0x0}}
ymm14          {v16_bfloat16 = {0x0 <repeats 16 times>}, v16_half = {0x0 <repeats 16 times>}, v8_float = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v4_double = {0x0, 0x0, 0x0, 0x0}, v32_int8 = {0x0 <repeats 32 times>}, v16_int16 = {0x0 <repeats 16 times>}, v8_int32 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v4_int64 = {0x0, 0x0, 0x0, 0x0}, v2_int128 = {0x0, 0x0}}
ymm15          {v16_bfloat16 = {0x0 <repeats 16 times>}, v16_half = {0x0 <repeats 16 times>}, v8_float = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v4_double = {0x0, 0x0, 0x0, 0x0}, v32_int8 = {0x0 <repeats 32 times>}, v16_int16 = {0x0 <repeats 16 times>}, v8_int32 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, v4_int64 = {0x0, 0x0, 0x0, 0x0}, v2_int128 = {0x0, 0x0}}
(gdb) 

Change History (1)

comment:1 by Cyryl Ł, 6 months ago

Keywords: vulkan cuda added
Note: See TracTickets for help on using tickets.