mirror of
https://github.com/libretro/RetroArch.git
synced 2025-04-02 10:51:52 -04:00
Don't use LUT for YUV conversion.
Current C version is just as fast, and doesn't thrash the cache. ~100us per 640x480 webcam frame.
This commit is contained in:
parent
9d60b53630
commit
c4176564c5
5 changed files with 96 additions and 136 deletions
|
@ -19,9 +19,11 @@
|
|||
#include <malloc.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include <stddef.h>
|
||||
#include "../driver.h"
|
||||
#include "../performance.h"
|
||||
#include "../miscellaneous.h"
|
||||
#include "../gfx/scaler/scaler.h"
|
||||
#include <stdlib.h>
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
|
@ -49,102 +51,21 @@ typedef struct video4linux
|
|||
unsigned n_buffers;
|
||||
size_t width;
|
||||
size_t height;
|
||||
size_t pitch;
|
||||
|
||||
uint32_t *YCbCr_to_RGB;
|
||||
struct scaler_ctx scaler;
|
||||
uint32_t *buffer_output;
|
||||
bool ready;
|
||||
|
||||
char dev_name[PATH_MAX];
|
||||
} video4linux_t;
|
||||
|
||||
// FIXME: Shouldn't use LUTs for this.
|
||||
// The LUT is simply too big, and the conversion can be done efficiently with fixed-point SIMD anyways.
|
||||
/*
|
||||
* YCbCr to RGB lookup table
|
||||
* Y, Cb, Cr range is 0-255
|
||||
*
|
||||
* Stored value bits:
|
||||
* 24-16 Red
|
||||
* 15-8 Green
|
||||
* 7-0 Blue
|
||||
*/
|
||||
#define YUV_SHIFT(y, cb, cr) ((y << 16) | (cb << 8) | (cr << 0))
|
||||
#define RGB_SHIFT(r, g, b) ((r << 16) | (g << 8) | (b << 0))
|
||||
|
||||
static uint32_t *generate_YCbCr_to_RGB_lookup(void)
|
||||
static void process_image(video4linux_t *v4l, const uint8_t *buffer_yuv)
|
||||
{
|
||||
int y;
|
||||
int cb;
|
||||
int cr;
|
||||
|
||||
uint32_t *buffer = (uint32_t*)malloc(256 * 256 * 256 * sizeof(uint32_t));
|
||||
if (!buffer)
|
||||
return NULL;
|
||||
|
||||
for (y = 0; y < 256; y++)
|
||||
{
|
||||
for (cb = 0; cb < 256; cb++)
|
||||
{
|
||||
for (cr = 0; cr < 256; cr++)
|
||||
{
|
||||
double Y = (double)y;
|
||||
double Cb = (double)cb;
|
||||
double Cr = (double)cr;
|
||||
|
||||
int R = (int)(Y + 1.40200 * (Cr - 0x80));
|
||||
int G = (int)(Y - 0.34414 * (Cb - 0x80) - 0.71414 * (Cr - 0x80));
|
||||
int B = (int)(Y + 1.77200 * (Cb - 0x80));
|
||||
|
||||
R = max(0, min(255, R));
|
||||
G = max(0, min(255, G));
|
||||
B = max(0, min(255, B));
|
||||
|
||||
buffer[YUV_SHIFT(y, cb, cr)] = RGB_SHIFT(R, G, B);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return buffer;
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts YUV422 to RGB
|
||||
* Before first use call generate_YCbCr_to_RGB_lookup();
|
||||
*
|
||||
* input is pointer to YUV422 encoded data in following order: Y0, Cb, Y1, Cr.
|
||||
* output is pointer to 24 bit RGB buffer.
|
||||
* Output data is written in following order: R1, G1, B1, R2, G2, B2.
|
||||
*/
|
||||
|
||||
// FIXME: Software CPU color conersion from YUV to RGB - we'll make two codepaths
|
||||
// eventually - GL binding to texture and color conversion through shaders,
|
||||
// and this approach
|
||||
|
||||
static inline void YUV422_to_RGB(uint32_t *output, const uint8_t *input, const uint32_t *lut)
|
||||
{
|
||||
uint8_t y0 = input[0];
|
||||
uint8_t cb = input[1];
|
||||
uint8_t y1 = input[2];
|
||||
uint8_t cr = input[3];
|
||||
|
||||
output[0] = lut[YUV_SHIFT(y0, cb, cr)];
|
||||
output[1] = lut[YUV_SHIFT(y1, cb, cr)];
|
||||
}
|
||||
|
||||
static void process_image(void *data, const uint8_t *buffer_yuv)
|
||||
{
|
||||
RARCH_PERFORMANCE_INIT(yuv_convert);
|
||||
RARCH_PERFORMANCE_START(yuv_convert);
|
||||
video4linux_t *v4l = (video4linux_t*)data;
|
||||
const uint32_t *lut = v4l->YCbCr_to_RGB;
|
||||
uint32_t *dst = v4l->buffer_output;
|
||||
size_t x, y;
|
||||
|
||||
for (y = 0; y < v4l->height; y++, dst += v4l->width, buffer_yuv += v4l->width * 2)
|
||||
for (x = 0; x < v4l->width; x += 2)
|
||||
YUV422_to_RGB(dst + x, buffer_yuv + x * 2, lut);
|
||||
|
||||
RARCH_PERFORMANCE_STOP(yuv_convert);
|
||||
RARCH_PERFORMANCE_INIT(yuv_convert_direct);
|
||||
RARCH_PERFORMANCE_START(yuv_convert_direct);
|
||||
scaler_ctx_scale(&v4l->scaler, v4l->buffer_output, buffer_yuv);
|
||||
RARCH_PERFORMANCE_STOP(yuv_convert_direct);
|
||||
}
|
||||
|
||||
static int xioctl(int fd, int request, void *args)
|
||||
|
@ -239,7 +160,7 @@ static bool init_device(void *data)
|
|||
unsigned min;
|
||||
video4linux_t *v4l = (video4linux_t*)data;
|
||||
|
||||
if (xioctl(v4l->fd, VIDIOC_QUERYCAP, &cap) == -1)
|
||||
if (xioctl(v4l->fd, VIDIOC_QUERYCAP, &cap) < 0)
|
||||
{
|
||||
if (errno == EINVAL)
|
||||
{
|
||||
|
@ -265,29 +186,15 @@ static bool init_device(void *data)
|
|||
return false;
|
||||
}
|
||||
|
||||
/* Select video input, video standard and tune here. */
|
||||
|
||||
memset(&cropcap, 0, sizeof(cropcap));
|
||||
|
||||
cropcap.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
|
||||
|
||||
if (xioctl(v4l->fd, VIDIOC_CROPCAP, &cropcap) == 0)
|
||||
{
|
||||
crop.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
|
||||
crop.c = cropcap.defrect; /* reset to default */
|
||||
|
||||
if (xioctl(v4l->fd, VIDIOC_S_CROP, &crop) == -1)
|
||||
{
|
||||
switch (errno)
|
||||
{
|
||||
case EINVAL:
|
||||
/* Cropping not supported. */
|
||||
break;
|
||||
default:
|
||||
/* Errors ignored. */
|
||||
break;
|
||||
}
|
||||
}
|
||||
crop.c = cropcap.defrect;
|
||||
// Ignore errors here.
|
||||
xioctl(v4l->fd, VIDIOC_S_CROP, &crop);
|
||||
}
|
||||
|
||||
memset(&fmt, 0, sizeof(fmt));
|
||||
|
@ -295,41 +202,42 @@ static bool init_device(void *data)
|
|||
fmt.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
|
||||
fmt.fmt.pix.width = v4l->width;
|
||||
fmt.fmt.pix.height = v4l->height;
|
||||
// TODO: See if we can use a saner format here.
|
||||
fmt.fmt.pix.pixelformat = V4L2_PIX_FMT_YUYV;
|
||||
fmt.fmt.pix.field = V4L2_FIELD_INTERLACED;
|
||||
fmt.fmt.pix.field = V4L2_FIELD_NONE;
|
||||
|
||||
if (xioctl(v4l->fd, VIDIOC_S_FMT, &fmt) == -1)
|
||||
if (xioctl(v4l->fd, VIDIOC_S_FMT, &fmt) < 0)
|
||||
{
|
||||
RARCH_ERR("Error - VIDIOC_S_FMT\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Note VIDIOC_S_FMT may change width and height. */
|
||||
// VIDIOC_S_FMT may change width, height and pitch.
|
||||
v4l->width = fmt.fmt.pix.width;
|
||||
v4l->height = fmt.fmt.pix.height;
|
||||
v4l->pitch = max(fmt.fmt.pix.bytesperline, v4l->width * 2);
|
||||
|
||||
/* Buggy driver paranoia. */
|
||||
min = fmt.fmt.pix.width * 2;
|
||||
if (fmt.fmt.pix.bytesperline < min)
|
||||
fmt.fmt.pix.bytesperline = min;
|
||||
min = fmt.fmt.pix.bytesperline * fmt.fmt.pix.height;
|
||||
if (fmt.fmt.pix.sizeimage < min)
|
||||
fmt.fmt.pix.sizeimage = min;
|
||||
// Sanity check to see if our assumptions are met.
|
||||
// It is possible to support whatever the device gives us,
|
||||
// but this dramatically increases complexity.
|
||||
if (fmt.fmt.pix.pixelformat != V4L2_PIX_FMT_YUYV)
|
||||
{
|
||||
RARCH_ERR("The V4L2 device doesn't support YUYV.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (fmt.fmt.pix.width != v4l->width)
|
||||
v4l->width = fmt.fmt.pix.width;
|
||||
|
||||
if (fmt.fmt.pix.height != v4l->height)
|
||||
v4l->height = fmt.fmt.pix.height;
|
||||
if (fmt.fmt.pix.field != V4L2_FIELD_NONE && fmt.fmt.pix.field != V4L2_FIELD_INTERLACED)
|
||||
{
|
||||
RARCH_ERR("The V4L2 device doesn't support progressive nor interlaced video.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
return init_mmap(v4l);
|
||||
}
|
||||
|
||||
static void v4l_stop(void *data)
|
||||
{
|
||||
enum v4l2_buf_type type;
|
||||
video4linux_t *v4l = (video4linux_t*)data;
|
||||
|
||||
type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
|
||||
enum v4l2_buf_type type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
|
||||
|
||||
if (xioctl(v4l->fd, VIDIOC_STREAMOFF, &type) == -1)
|
||||
RARCH_ERR("Error - VIDIOC_STREAMOFF.\n");
|
||||
|
@ -385,9 +293,8 @@ static void v4l_free(void *data)
|
|||
if (v4l->fd >= 0)
|
||||
close(v4l->fd);
|
||||
|
||||
free(v4l->YCbCr_to_RGB);
|
||||
free(v4l->buffer_output);
|
||||
|
||||
scaler_ctx_gen_reset(&v4l->scaler);
|
||||
free(v4l);
|
||||
}
|
||||
|
||||
|
@ -441,10 +348,16 @@ static void *v4l_init(const char *device, uint64_t caps, unsigned width, unsigne
|
|||
goto error;
|
||||
}
|
||||
|
||||
v4l->YCbCr_to_RGB = generate_YCbCr_to_RGB_lookup();
|
||||
if (!v4l->YCbCr_to_RGB)
|
||||
v4l->scaler.in_width = v4l->scaler.out_width = v4l->width;
|
||||
v4l->scaler.in_height = v4l->scaler.out_height = v4l->height;
|
||||
v4l->scaler.in_fmt = SCALER_FMT_YUYV;
|
||||
v4l->scaler.out_fmt = SCALER_FMT_ARGB8888;
|
||||
v4l->scaler.in_stride = v4l->pitch;
|
||||
v4l->scaler.out_stride = v4l->width * 4;
|
||||
|
||||
if (!scaler_ctx_gen_filter(&v4l->scaler))
|
||||
{
|
||||
RARCH_ERR("Failed to create YUV->RGB LUT.\n");
|
||||
RARCH_ERR("Failed to create scaler.\n");
|
||||
goto error;
|
||||
}
|
||||
|
||||
|
@ -473,11 +386,6 @@ static bool preprocess_image(void *data)
|
|||
{
|
||||
case EAGAIN:
|
||||
return false;
|
||||
case EIO:
|
||||
/* Could ignore EIO, see spec. */
|
||||
|
||||
/* fall through */
|
||||
|
||||
default:
|
||||
RARCH_ERR("VIDIOC_DQBUF.\n");
|
||||
return false;
|
||||
|
@ -521,3 +429,4 @@ const camera_driver_t camera_v4l2 = {
|
|||
v4l_poll,
|
||||
"video4linux2",
|
||||
};
|
||||
|
||||
|
|
|
@ -672,6 +672,50 @@ void conv_argb8888_abgr8888(void *output_, const void *input_,
|
|||
}
|
||||
}
|
||||
|
||||
static inline uint8_t clamp_8bit(int val)
|
||||
{
|
||||
if (val > 255)
|
||||
return 255;
|
||||
else if (val < 0)
|
||||
return 0;
|
||||
else
|
||||
return val;
|
||||
}
|
||||
|
||||
void conv_yuyv_argb8888(void *output_, const void *input_,
|
||||
int width, int height,
|
||||
int out_stride, int in_stride)
|
||||
{
|
||||
int h, w;
|
||||
const uint8_t *input = (const uint8_t*)input_;
|
||||
uint32_t *output = (uint32_t*)output_;
|
||||
|
||||
for (h = 0; h < height; h++, output += out_stride >> 2, input += in_stride)
|
||||
{
|
||||
const uint8_t *src = input;
|
||||
uint32_t *dst = output;
|
||||
|
||||
for (w = 0; w < width; w += 2, src += 4, dst += 2)
|
||||
{
|
||||
int y0 = src[0] - 16;
|
||||
int u = src[1] - 128;
|
||||
int y1 = src[2] - 16;
|
||||
int v = src[3] - 128;
|
||||
|
||||
uint8_t r0 = clamp_8bit((298 * y0 + 409 * v + 128) >> 8);
|
||||
uint8_t g0 = clamp_8bit((298 * y0 - 100 * u - 208 * v + 128) >> 8);
|
||||
uint8_t b0 = clamp_8bit((298 * y0 + 516 * u + 128) >> 8);
|
||||
|
||||
uint8_t r1 = clamp_8bit((298 * y1 + 409 * v + 128) >> 8);
|
||||
uint8_t g1 = clamp_8bit((298 * y1 - 100 * u - 208 * v + 128) >> 8);
|
||||
uint8_t b1 = clamp_8bit((298 * y1 + 516 * u + 128) >> 8);
|
||||
|
||||
dst[0] = 0xff000000u | (r0 << 16) | (g0 << 8) | (b0 << 0);
|
||||
dst[1] = 0xff000000u | (r1 << 16) | (g1 << 8) | (b1 << 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void conv_copy(void *output_, const void *input_,
|
||||
int width, int height,
|
||||
int out_stride, int in_stride)
|
||||
|
|
|
@ -60,6 +60,10 @@ void conv_rgb565_bgr24(void *output, const void *input,
|
|||
int width, int height,
|
||||
int out_stride, int in_stride);
|
||||
|
||||
void conv_yuyv_argb8888(void *output, const void *input,
|
||||
int width, int height,
|
||||
int out_stride, int in_stride);
|
||||
|
||||
void conv_copy(void *output, const void *input,
|
||||
int width, int height,
|
||||
int out_stride, int in_stride);
|
||||
|
|
|
@ -86,6 +86,8 @@ static bool set_direct_pix_conv(struct scaler_ctx *ctx)
|
|||
ctx->direct_pixconv = conv_0rgb1555_bgr24;
|
||||
else if (ctx->in_fmt == SCALER_FMT_ARGB8888 && ctx->out_fmt == SCALER_FMT_ABGR8888)
|
||||
ctx->direct_pixconv = conv_argb8888_abgr8888;
|
||||
else if (ctx->in_fmt == SCALER_FMT_YUYV && ctx->out_fmt == SCALER_FMT_ARGB8888)
|
||||
ctx->direct_pixconv = conv_yuyv_argb8888;
|
||||
else
|
||||
return false;
|
||||
|
||||
|
|
|
@ -28,7 +28,8 @@ enum scaler_pix_fmt
|
|||
SCALER_FMT_ABGR8888,
|
||||
SCALER_FMT_0RGB1555,
|
||||
SCALER_FMT_RGB565,
|
||||
SCALER_FMT_BGR24
|
||||
SCALER_FMT_BGR24,
|
||||
SCALER_FMT_YUYV
|
||||
};
|
||||
|
||||
enum scaler_type
|
||||
|
|
Loading…
Add table
Reference in a new issue