Как применить пользовательскую функцию к нескольким столбцам в BigQuery SQL?

В базе данных, над которой я работаю, есть несколько переменных заработной платы, которые записываются в виде строк с такими записями, как 0000001155,00. Я использую комбинацию CAST и REPLACE для преобразования этих переменных в число с плавающей запятой. Только для одной переменной я использовал:

 CAST (REPLACE (wage_var, ",", ".") AS float64) as wage_formatted

Я хотел бы выполнить эту процедуру для всех переменных, имеющих одну и ту же проблему, без повторения одной и той же строки кода. Моя идея состоит в том, чтобы использовать функцию, а затем повторять функцию по столбцам.

Я понимаю, как я могу создать функцию для выполнения стандартизации после прочтения документации. Затем я написал следующую функцию:

CREATE TEMP FUNCTION wage2float(x STRING) AS (CAST(REPLACE(x, ",", ".") AS float64));
SELECT
  wage_var,
  wage2float(wage_var) as wage_formatted
FROM
  `mydataset.mytable`

Однако мне непонятно, как я могу повторить эту функцию для нескольких столбцов. Есть ли способ перебирать столбцы и применять функцию wage2float для каждого столбца?

Обновлено:

Вот пример ввода (csv):

vl_remun_media_nom,vl_remun_media_sm,vl_remun_dezembro_nom,vl_remun_dezembro_sm,vl_ultima_remuneracao_ano,vl_salario_contratual,vl_rem_janeiro_cc,vl_rem_fevereiro_cc,vl_rem_marco_cc,vl_rem_abril_cc,vl_rem_maio_cc,vl_rem_junho_cc,vl_rem_julho_cc,vl_rem_agosto_cc,vl_rem_setembro_cc,vl_rem_outubro_cc,vl_rem_novembro_cc
"0000006025,55","000006,42","0000005921,09","000006,31","0005921,09","0005148,77","000000005866,27","000000005866,27","000000005866,27","000000005866,27","000000005866,27","000000005866,27","000000007169,88","000000006254,78","000000005921,09","000000005921,09","000000005921,09"
"0000001447,68","000001,54","0000001726,67","000001,84","0001726,67","0000014,00","000000001645,55","000000000014,00","000000000014,00","000000000014,00","000000000014,00","000000000014,00","000000000014,00","000000000014,00","000000000014,00","000000000014,00","000000000014,00"
"0000001304,35","000001,39","0000001304,35","000001,39","0001304,35","0001304,35","000000000000,00","000000000000,00","000000000000,00","000000000000,00","000000000000,00","000000000000,00","000000000000,00","000000001304,35","000000001304,35","000000001304,35","000000001304,35"
"0000001447,68","000001,54","0000001726,67","000001,84","0001726,67","0000014,00","000000001645,55","000000000014,00","000000000014,00","000000000014,00","000000000014,00","000000000014,00","000000000014,00","000000000014,00","000000000014,00","000000000014,00","000000000014,00"
"0000001447,68","000001,54","0000001726,67","000001,84","0001726,67","0000014,00","000000001645,56","000000000014,00","000000000014,00","000000000014,00","000000000014,00","000000000014,00","000000000014,00","000000000014,00","000000000014,00","000000000014,00","000000000014,00"
"0000001447,68","000001,54","0000001726,67","000001,84","0001726,67","0000014,00","000000001645,55","000000000014,00","000000000014,00","000000000014,00","000000000014,00","000000000014,00","000000000014,00","000000000014,00","000000000014,00","000000000014,00","000000000014,00"
"0000001427,95","000001,52","0000001420,68","000001,51","0001420,68","0001420,68","000000001379,30","000000001379,30","000000001379,30","000000001379,30","000000001379,30","000000001379,30","000000001839,07","000000001379,30","000000001379,30","000000001420,68","000000001420,68"
"0000005937,88","000006,33","0000005900,00","000006,29","0005900,00","0000059,00","000000000057,38","000000000057,38","000000000057,38","000000000057,38","000000007650,67","000000000057,38","000000000057,38","000000000057,38","000000000057,38","000000000059,00","000000000059,00"
"0000001087,04","000001,15","0000001076,20","000001,14","0001076,20","0001076,20","000000000010,00","000000000010,00","000000000010,00","000000001076,20","000000001076,20","000000001076,20","000000001076,20","000000001434,93","000000001076,20","000000001076,20","000000001076,20"
"0000002395,30","000002,55","0000002448,79","000002,61","0002448,79","0002448,79","000000002377,47","000000002377,47","000000002377,47","000000002377,47","000000002377,47","000000002377,47","000000002377,47","000000002377,47","000000002377,47","000000002448,79","000000002448,79"
"0000001870,56","000001,99","0000001820,00","000001,94","0001820,00","0000018,00","000000001820,01","000000001820,01","000000001820,01","000000001820,01","000000001820,01","000000000018,20","000000000018,20","000000000018,20","000000000018,20","000000002426,67","000000000018,20"
"0000002960,08","000003,15","0000003068,59","000003,27","0003068,59","0000027,00","000000002724,53","000000002500,09","000000003454,64","000000002700,88","000000002943,15","000000002943,42","000000002943,69","000000003098,28","000000003098,24","000000002976,73","000000003068,79"
"0000003798,04","000004,04","0000003852,69","000004,11","0003852,69","0000030,00","000000002500,45","000000002500,57","000000002500,79","000000005306,55","000000005079,02","000000003430,02","000000004239,21","000000004182,29","000000004913,02","000000003247,38","000000003824,52"
"0000004945,06","000005,27","0000005286,81","000005,64","0005286,81","0000045,00","000000004000,10","000000004000,16","000000005392,43","000000004919,14","000000004500,98","000000004500,21","000000005936,10","000000006133,08","000000004795,43","000000004576,91","000000005299,44"
"0000005810,00","000006,19","0000005540,00","000005,91","0005540,00","0000055,40","000000006933,33","000000000055,40","000000000055,40","000000000055,40","000000000055,40","000000000055,40","000000000055,40","000000007386,67","000000000055,40","000000000055,40","000000000055,40"
"0000001103,62","000001,17","0000001090,00","000001,16","0001090,00","0000010,90","000000000010,31","000000000010,31","000000000010,31","000000001086,20","000000001086,20","000000001086,20","000000001086,20","000000001086,20","000000001086,20","000000001453,33","000000000010,90"
"0000002600,34","000002,77","0000002866,13","000003,05","0002866,13","0000010,91","000000000000,00","000000000000,00","000000000000,00","000000000000,00","000000002168,92","000000001999,70","000000002175,13","000000003036,83","000000002909,14","000000002887,45","000000002759,44"
"0000005174,66","000005,51","0000004967,86","000005,30","0004967,86","0000016,15","000000005154,31","000000004621,59","000000005161,25","000000005080,73","000000005185,34","000000004981,24","000000006430,29","000000005584,57","000000005064,43","000000005029,16","000000004835,26"
"0000005693,03","000006,07","0000005650,78","000006,03","0005650,78","0005650,78","000000005433,44","000000005433,44","000000005433,44","000000005433,44","000000007244,59","000000005433,44","000000005433,44","000000005868,12","000000005650,78","000000005650,78","000000005650,78"
"0000002485,76","000002,64","0000002810,52","000002,99","0002810,52","0000010,91","000000002193,56","000000001925,13","000000002352,46","000000002135,21","000000002440,66","000000002232,19","000000002951,81","000000002947,97","000000002588,45","000000002516,61","000000002734,59"
"0000003808,35","000004,06","0000003893,40","000004,15","0003893,40","0003893,40","000000000037,80","000000000037,80","000000000037,80","000000000037,80","000000000037,80","000000000037,80","000000000037,80","000000000037,80","000000000037,80","000000000037,80","000000004006,80"
"0000004648,00","000004,95","0000004549,71","000004,85","0004549,71","0004549,71","000000004212,70","000000004549,71","000000006066,28","000000004549,71","000000004549,71","000000004549,71","000000004549,71","000000004549,71","000000004549,71","000000004549,71","000000004549,71"
"0000004521,62","000004,82","0000004549,71","000004,85","0004549,71","0004549,71","000000004212,70","000000004549,71","000000004549,71","000000004549,71","000000004549,71","000000004549,71","000000004549,71","000000004549,71","000000004549,71","000000004549,71","000000004549,71"
"0000003024,00","000003,22","0000003024,00","000003,22","0003024,00","0000030,24","000000000028,00","000000000028,00","000000000028,00","000000000028,00","000000000039,20","000000000030,24","000000000030,24","000000000030,24","000000000030,24","000000000030,24","000000000030,24"
"0000002946,43","000003,14","0000002910,00","000003,10","0002910,00","0001923,68","000000000000,00","000000000000,00","000000000000,00","000000000000,00","000000000000,00","000000000000,00","000000000000,00","000000000000,00","000000000000,00","000000002983,70","000000002945,59"

Желаемый результат:

vl_remun_media_nom,vl_remun_media_sm,vl_remun_dezembro_nom,vl_remun_dezembro_sm,vl_ultima_remuneracao_ano,vl_salario_contratual,vl_rem_janeiro_cc,vl_rem_fevereiro_cc,vl_rem_marco_cc,vl_rem_abril_cc,vl_rem_maio_cc,vl_rem_junho_cc,vl_rem_julho_cc,vl_rem_agosto_cc,vl_rem_setembro_cc,vl_rem_outubro_cc,vl_rem_novembro_cc
6025.55,6.42,5921.09,6.31,5921.09,5148.77,5866.27,5866.27,5866.27,5866.27,5866.27,5866.27,7169.88,6254.78,5921.09,5921.09,5921.09
1447.68,1.54,1726.67,1.84,1726.67,14.0,1645.55,14.0,14.0,14.0,14.0,14.0,14.0,14.0,14.0,14.0,14.0
1304.35,1.39,1304.35,1.39,1304.35,1304.35,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1304.35,1304.35,1304.35,1304.35
1447.68,1.54,1726.67,1.84,1726.67,14.0,1645.55,14.0,14.0,14.0,14.0,14.0,14.0,14.0,14.0,14.0,14.0
1447.68,1.54,1726.67,1.84,1726.67,14.0,1645.56,14.0,14.0,14.0,14.0,14.0,14.0,14.0,14.0,14.0,14.0
1447.68,1.54,1726.67,1.84,1726.67,14.0,1645.55,14.0,14.0,14.0,14.0,14.0,14.0,14.0,14.0,14.0,14.0
1427.95,1.52,1420.68,1.51,1420.68,1420.68,1379.3,1379.3,1379.3,1379.3,1379.3,1379.3,1839.07,1379.3,1379.3,1420.68,1420.68
5937.88,6.33,5900.0,6.29,5900.0,59.0,57.38,57.38,57.38,57.38,7650.67,57.38,57.38,57.38,57.38,59.0,59.0
1087.04,1.15,1076.2,1.14,1076.2,1076.2,10.0,10.0,10.0,1076.2,1076.2,1076.2,1076.2,1434.93,1076.2,1076.2,1076.2
2395.3,2.55,2448.79,2.61,2448.79,2448.79,2377.47,2377.47,2377.47,2377.47,2377.47,2377.47,2377.47,2377.47,2377.47,2448.79,2448.79
1870.56,1.99,1820.0,1.94,1820.0,18.0,1820.01,1820.01,1820.01,1820.01,1820.01,18.2,18.2,18.2,18.2,2426.67,18.2
2960.08,3.15,3068.59,3.27,3068.59,27.0,2724.53,2500.09,3454.64,2700.88,2943.15,2943.42,2943.69,3098.28,3098.24,2976.73,3068.79
3798.04,4.04,3852.69,4.11,3852.69,30.0,2500.45,2500.57,2500.79,5306.55,5079.02,3430.02,4239.21,4182.29,4913.02,3247.38,3824.52
4945.06,5.27,5286.81,5.64,5286.81,45.0,4000.1,4000.16,5392.43,4919.14,4500.98,4500.21,5936.1,6133.08,4795.43,4576.91,5299.44
5810.0,6.19,5540.0,5.91,5540.0,55.4,6933.33,55.4,55.4,55.4,55.4,55.4,55.4,7386.67,55.4,55.4,55.4
1103.62,1.17,1090.0,1.16,1090.0,10.9,10.31,10.31,10.31,1086.2,1086.2,1086.2,1086.2,1086.2,1086.2,1453.33,10.9
2600.34,2.77,2866.13,3.05,2866.13,10.91,0.0,0.0,0.0,0.0,2168.92,1999.7,2175.13,3036.83,2909.14,2887.45,2759.44
5174.66,5.51,4967.86,5.3,4967.86,16.15,5154.31,4621.59,5161.25,5080.73,5185.34,4981.24,6430.29,5584.57,5064.43,5029.16,4835.26
5693.03,6.07,5650.78,6.03,5650.78,5650.78,5433.44,5433.44,5433.44,5433.44,7244.59,5433.44,5433.44,5868.12,5650.78,5650.78,5650.78
2485.76,2.64,2810.52,2.99,2810.52,10.91,2193.56,1925.13,2352.46,2135.21,2440.66,2232.19,2951.81,2947.97,2588.45,2516.61,2734.59
3808.35,4.06,3893.4,4.15,3893.4,3893.4,37.8,37.8,37.8,37.8,37.8,37.8,37.8,37.8,37.8,37.8,4006.8
4648.0,4.95,4549.71,4.85,4549.71,4549.71,4212.7,4549.71,6066.28,4549.71,4549.71,4549.71,4549.71,4549.71,4549.71,4549.71,4549.71
4521.62,4.82,4549.71,4.85,4549.71,4549.71,4212.7,4549.71,4549.71,4549.71,4549.71,4549.71,4549.71,4549.71,4549.71,4549.71,4549.71
3024.0,3.22,3024.0,3.22,3024.0,30.24,28.0,28.0,28.0,28.0,39.2,30.24,30.24,30.24,30.24,30.24,30.24
2946.43,3.14,2910.0,3.1,2910.0,1923.68,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2983.7,2945.59

Пожалуйста, предоставьте образец входных данных и соответствующий ожидаемый результат - см. Как создать минимальный воспроизводимый пример

Mikhail Berlyant 20.12.2020 20:56

Вы хотите, чтобы ВСЕ столбцы обрабатывались таким образом, или вы хотите предоставить список столбцов, которые необходимо обработать? оба случая легко выполнимы, но немного отличаются - так что дайте знать, какой из них ваш

Mikhail Berlyant 20.12.2020 21:09

Нет. Только столбцы, начинающиеся с vl. Есть несколько других переменных, которые не нуждались в этой процедуре.

Lucas 20.12.2020 21:15

Хорошо. должно быть просто :о) - ответ опубликую позже через день

Mikhail Berlyant 20.12.2020 21:16
ReactJs | Supabase | Добавление данных в базу данных
ReactJs | Supabase | Добавление данных в базу данных
Это и есть ваш редактор таблиц в supabase.👇
Понимание Python и переход к SQL
Понимание Python и переход к SQL
Перед нами лабораторная работа по BloodOath:
2
4
1 020
2
Перейти к ответу Данный вопрос помечен как решенный

Ответы 2

Если вам нужен запрос select, вы просто используете:

SELECT CAST(REPLACE(wage_var, ',', '.') AS float64) as wage_formatted,
       CAST(REPLACE(taxes_var, ',', '.') AS float64) as taxes_formatted,
       . . . 
FROM t;

Если вы хотите сделать это «навсегда». . . ну, я бы предложил вид:

CREATE VIEW v_t AS
    SELECT t.*,
           CAST(REPLACE(wage_var, ',', '.') AS float64) as wage_formatted,
           CAST(REPLACE(taxes_var, ',', '.') AS float64) as taxes_formatted,
           . . . 
    FROM t;

Вы также можете добавить в таблицу новые столбцы и присвоить им значение с плавающей запятой.

Хорошо. Но нужно ли повторять строки? Я имею в виду, что нет способа создать список столбцов, а затем выполнить итерацию по списку?

Lucas 20.12.2020 19:46
Ответ принят как подходящий
Только столбцы, начинающиеся с вл. Есть несколько других переменных, которые не нуждались в этой процедуре.

Ниже приведен стандартный SQL BigQuery с использованием сценариев BQ.

execute immediate (select 'select * replace(' || 
  string_agg('cast(replace(' || column || ', ",", ".") as float64) as ' || column, ', ') || 
  ') from YourTable'
from (
  select regexp_extract_all(to_json_string(t), r'"(vl_[^"]*)":') as columns
  from YourTable t
  limit 1
), unnest(columns) column);    

Если применить к приведенному ниже упрощенному примеру (он все еще полностью представляет вариант использования OP):

select 1 id, "0000006025,55" vl_x, "000006,42" y, "0000005921,09" vl_z union all
select 2, "0000001447,68", "000001,54", "0000001726,67" 

Выход

Вы должны нажать на VIEW RESULTS в последней строке, чтобы увидеть окончательный результат

Зависит от того, что вы хотите затем сделать с результатом - вы можете настроить код, чтобы заменить YourTable этим выводом или создать новый и т. д. См. пример такой настройки (только первая строка - остальные одинаковы)

execute immediate (select 'create table NewTable as select * replace(' || 
. . .    

Обратите внимание на обновление, которое я только что сделал - я понял - я пропустил (опечатка) * в одном месте регулярного выражения

Mikhail Berlyant 20.12.2020 22:30

Спасибо. Оно работает. Я новичок в BQ, поэтому мне нужно время, чтобы понять, как мне запустить скрипт. Я создаю файл с именем format_wage.sql с содержимым скрипта, затем запускаю его в gcloud shell bq query --use_legacy_sql=false --flagfile=format_wage.sql. Возможно, эта информация будет полезна для некоторых будущих читателей этого вопроса.

Lucas 20.12.2020 23:09

Другие вопросы по теме