2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
|
# File 'ext/multi_compress/multi_compress.c', line 2883
static VALUE inflater_write(VALUE self, VALUE chunk) {
inflater_t *inf;
TypedData_Get_Struct(self, inflater_t, &inflater_type, inf);
if (inf->closed)
rb_raise(eStreamError, "stream is closed");
StringValue(chunk);
const char *src = RSTRING_PTR(chunk);
size_t slen = RSTRING_LEN(chunk);
const algo_policy_t *policy = algo_policy(inf->algo);
if (slen == 0)
return rb_binary_str_new("", 0);
size_t input_accounted_before = inf->total_input;
switch (inf->algo) {
case ALGO_ZSTD: {
ZSTD_inBuffer input = {src, slen, 0};
size_t out_cap = ZSTD_DStreamOutSize();
size_t result_cap = out_cap > slen * 2 ? out_cap : slen * 2;
size_t remaining_total_budget =
inf->max_output_size > inf->total_output ? inf->max_output_size - inf->total_output : 0;
if (remaining_total_budget == 0)
rb_raise(eDataError, "decompressed output exceeds limit (%zu bytes)",
inf->max_output_size);
if (result_cap > remaining_total_budget)
result_cap = remaining_total_budget;
VALUE result = rb_binary_str_buf_reserve(result_cap);
size_t result_len = 0;
VALUE scheduler = current_fiber_scheduler();
while (input.pos < input.size) {
size_t remaining_budget = inf->max_output_size - inf->total_output - result_len;
if (remaining_budget == 0)
rb_raise(eDataError, "decompressed output exceeds limit (%zu bytes)",
inf->max_output_size);
if (result_len + out_cap > result_cap) {
size_t next_cap = result_cap * 2;
if (next_cap > inf->max_output_size - inf->total_output)
next_cap = inf->max_output_size - inf->total_output;
result_cap = next_cap;
rb_str_resize(result, result_cap);
}
size_t current_out_cap = out_cap;
if (current_out_cap > remaining_budget)
current_out_cap = remaining_budget;
ZSTD_outBuffer output = {RSTRING_PTR(result) + result_len, current_out_cap, 0};
size_t ret;
if (select_fiber_or_direct_mode(scheduler, input.size - input.pos,
policy->fiber_stream_threshold) == WORK_EXEC_FIBER) {
zstd_decompress_stream_chunk_fiber_t args = {
.dstream = inf->ctx.zstd,
.output = output,
.input = input,
.result = 0,
};
RUN_VIA_FIBER_WORKER(zstd_decompress_stream_chunk_fiber_nogvl, args);
output.pos = args.output.pos;
input.pos = args.input.pos;
ret = args.result;
} else {
ret = ZSTD_decompressStream(inf->ctx.zstd, &output, &input);
}
if (ZSTD_isError(ret))
rb_raise(eDataError, "zstd decompress stream: %s", ZSTD_getErrorName(ret));
result_len = checked_add_size(result_len, output.pos,
"decompressed output exceeds representable size");
size_t total_output = checked_add_size(
inf->total_output, result_len, "decompressed output exceeds representable size");
size_t total_input = checked_add_size(input_accounted_before, input.pos,
"compressed input exceeds representable size");
enforce_output_and_ratio_limits(total_output, total_input, inf->max_output_size,
inf->max_ratio_enabled, inf->max_ratio);
if (ret == 0)
break;
}
inf->total_input = checked_add_size(input_accounted_before, input.pos,
"compressed input exceeds representable size");
inf->total_output = checked_add_size(inf->total_output, result_len,
"decompressed output exceeds representable size");
rb_str_set_len(result, result_len);
RB_GC_GUARD(chunk);
return result;
}
case ALGO_BROTLI: {
size_t available_in = slen;
const uint8_t *next_in = (const uint8_t *)src;
size_t remaining_total_budget =
inf->max_output_size > inf->total_output ? inf->max_output_size - inf->total_output : 0;
if (remaining_total_budget == 0)
rb_raise(eDataError, "decompressed output exceeds limit (%zu bytes)",
inf->max_output_size);
size_t result_cap = slen * 2;
if (result_cap < 1024)
result_cap = 1024;
if (result_cap > remaining_total_budget)
result_cap = remaining_total_budget;
VALUE result = rb_binary_str_buf_reserve(result_cap);
size_t result_len = 0;
VALUE scheduler = current_fiber_scheduler();
while (available_in > 0 || BrotliDecoderHasMoreOutput(inf->ctx.brotli)) {
size_t available_out = 0;
uint8_t *next_out = NULL;
BrotliDecoderResult res;
if (select_fiber_or_direct_mode(scheduler, available_in,
policy->fiber_stream_threshold) == WORK_EXEC_FIBER) {
brotli_decompress_stream_fiber_t sargs = {
.dec = inf->ctx.brotli,
.available_in = available_in,
.next_in = next_in,
.available_out = available_out,
.next_out = next_out,
.result = BROTLI_DECODER_RESULT_ERROR,
};
RUN_VIA_FIBER_WORKER(brotli_decompress_stream_fiber_nogvl, sargs);
available_in = sargs.available_in;
next_in = sargs.next_in;
available_out = sargs.available_out;
next_out = sargs.next_out;
res = sargs.result;
} else {
res = BrotliDecoderDecompressStream(inf->ctx.brotli, &available_in, &next_in,
&available_out, &next_out, NULL);
}
if (res == BROTLI_DECODER_RESULT_ERROR)
rb_raise(eDataError, "brotli decompress stream: %s",
BrotliDecoderErrorString(BrotliDecoderGetErrorCode(inf->ctx.brotli)));
const uint8_t *out_data;
size_t out_size = 0;
out_data = BrotliDecoderTakeOutput(inf->ctx.brotli, &out_size);
if (out_size > 0) {
size_t total_output = checked_add_size(
inf->total_output,
checked_add_size(result_len, out_size,
"decompressed output exceeds representable size"),
"decompressed output exceeds representable size");
size_t total_input =
checked_add_size(input_accounted_before, slen - available_in,
"compressed input exceeds representable size");
enforce_output_and_ratio_limits(total_output, total_input, inf->max_output_size,
inf->max_ratio_enabled, inf->max_ratio);
if (result_len + out_size > result_cap) {
result_cap = result_len + out_size;
rb_str_resize(result, result_cap);
}
memcpy(RSTRING_PTR(result) + result_len, out_data, out_size);
result_len += out_size;
}
if (res == BROTLI_DECODER_RESULT_SUCCESS)
break;
if (res == BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT && available_in == 0)
break;
}
inf->total_input = checked_add_size(input_accounted_before, slen - available_in,
"compressed input exceeds representable size");
inf->total_output = checked_add_size(inf->total_output, result_len,
"decompressed output exceeds representable size");
rb_str_set_len(result, result_len);
RB_GC_GUARD(chunk);
return result;
}
case ALGO_LZ4: {
size_t data_len = inf->lz4_buf.len - inf->lz4_buf.offset;
size_t needed =
checked_add_size(data_len, slen, "lz4 stream input buffer exceeds representable size");
if (needed > inf->lz4_buf.cap) {
size_t new_cap = needed > SIZE_MAX / 2 ? needed : needed * 2;
if (inf->lz4_buf.offset > 0) {
char *new_buf = ALLOC_N(char, new_cap);
if (data_len > 0)
memcpy(new_buf, inf->lz4_buf.buf + inf->lz4_buf.offset, data_len);
xfree(inf->lz4_buf.buf);
inf->lz4_buf.buf = new_buf;
inf->lz4_buf.offset = 0;
inf->lz4_buf.len = data_len;
inf->lz4_buf.cap = new_cap;
} else {
REALLOC_N(inf->lz4_buf.buf, char, new_cap);
inf->lz4_buf.cap = new_cap;
}
} else if (inf->lz4_buf.offset > inf->lz4_buf.cap / 2) {
if (data_len > 0)
memmove(inf->lz4_buf.buf, inf->lz4_buf.buf + inf->lz4_buf.offset, data_len);
inf->lz4_buf.offset = 0;
inf->lz4_buf.len = data_len;
}
memcpy(inf->lz4_buf.buf + inf->lz4_buf.len, src, slen);
inf->lz4_buf.len += slen;
size_t remaining_total_budget =
inf->max_output_size > inf->total_output ? inf->max_output_size - inf->total_output : 0;
if (remaining_total_budget == 0)
rb_raise(eDataError, "decompressed output exceeds limit (%zu bytes)",
inf->max_output_size);
size_t result_cap = slen * 4;
if (result_cap < 256)
result_cap = 256;
if (result_cap > remaining_total_budget)
result_cap = remaining_total_budget;
VALUE result = rb_binary_str_buf_new(result_cap);
size_t result_len = 0;
int use_fiber = has_fiber_scheduler();
size_t fiber_counter = 0;
size_t pos = inf->lz4_buf.offset;
while (pos + 4 <= inf->lz4_buf.len) {
const uint8_t *p = (const uint8_t *)(inf->lz4_buf.buf + pos);
uint32_t orig_size = read_le_u32(p);
if (orig_size == 0) {
inf->finished = 1;
pos += 4;
break;
}
if (pos + 8 > inf->lz4_buf.len)
break;
uint32_t comp_size = read_le_u32(p + 4);
if (pos + 8 + comp_size > inf->lz4_buf.len)
break;
if (orig_size > 64 * 1024 * 1024)
rb_raise(eDataError, "lz4 stream: block too large (%u)", orig_size);
size_t total_output =
checked_add_size(inf->total_output,
checked_add_size(result_len, orig_size,
"decompressed output exceeds representable size"),
"decompressed output exceeds representable size");
size_t total_input = checked_add_size(
input_accounted_before, (pos + 8 + (size_t)comp_size) - inf->lz4_buf.offset,
"compressed input exceeds representable size");
enforce_output_and_ratio_limits(total_output, total_input, inf->max_output_size,
inf->max_ratio_enabled, inf->max_ratio);
if (result_len + orig_size > result_cap) {
result_cap = result_len + orig_size;
rb_str_resize(result, result_cap);
}
int dsize =
LZ4_decompress_safe(inf->lz4_buf.buf + pos + 8, RSTRING_PTR(result) + result_len,
(int)comp_size, (int)orig_size);
if (dsize < 0)
rb_raise(eDataError, "lz4 stream decompress block failed");
result_len += dsize;
pos += 8 + comp_size;
if (use_fiber) {
int did_yield = 0;
fiber_counter = fiber_maybe_yield(fiber_counter, (size_t)dsize,
policy->fiber_yield_chunk, &did_yield);
(void)did_yield;
}
}
inf->total_input = checked_add_size(input_accounted_before, pos - inf->lz4_buf.offset,
"compressed input exceeds representable size");
inf->lz4_buf.offset = pos;
inf->total_output = checked_add_size(inf->total_output, result_len,
"decompressed output exceeds representable size");
rb_str_set_len(result, result_len);
RB_GC_GUARD(chunk);
return result;
}
}
return rb_binary_str_new("", 0);
}
|