Для этого используем снежинку:
У меня есть запрос, который создает очень простое объединение таблиц из 5 разных источников данных:
WITH personal_info_workday AS (
SELECT
'Workday' AS source,
CAST(w.worker_code AS string) AS employee_id,
w.business_site_summary_name AS office_location
FROM
prod_raw.workday_prod.worker w
),
personal_info_bamboo AS (
SELECT
'Bamboo' AS source,
NULL AS employee_id,
NULL AS office_location
FROM
prod_raw.bamboohr.employee e
),
personal_info_adp AS (
SELECT
'ADP' AS source,
CAST(w.id AS string) AS employee_id,
NULL AS office_location
FROM
prod_raw.adp_workforce_now.worker w
),
personal_info_salesforce AS (
SELECT
'Salesforce' AS source,
CAST(c.workday_id__c AS string) AS employee_id,
c.office_location__c AS office_location
FROM
prod_raw.salesforce_corp_hub.contact c
WHERE
c.full_time_part_time__c = 'Contractor'
),
personal_info_mihi AS (
SELECT
'Mihi' AS source,
CAST(e.employee_number AS string) AS employee_id,
NULL AS office_location
FROM
prod_raw.mihi.employees e
)
SELECT * FROM personal_info_workday
UNION ALL
SELECT *
FROM personal_info_salesforce s
WHERE NOT EXISTS (
SELECT 1
FROM personal_info_workday w
WHERE s.employee_id = w.employee_id
)
UNION ALL
SELECT * FROM personal_info_bamboo
UNION ALL
SELECT * FROM personal_info_adp
UNION ALL
SELECT * FROM personal_info_mihi
ORDER BY employee_id asc
Он создает следующую таблицу:
Каждый раз, когда в workday
и salesforce
[источнике] одновременно встречается экземпляр номера сотрудника (ID 57), я хочу, чтобы возвращалась только строка workday
, отфильтруйте дубликат salesforce
. Любой другой экземпляр номера сотрудника (например, ADP, также имеющий идентификатор 57), просто оставьте его как есть.
Вот как я хочу, чтобы таблица выглядела:
Я пытался использовать это, но оно не было правильно консолидировано:
SELECT *
FROM personal_info_salesforce s
WHERE NOT EXISTS (
SELECT 1
FROM personal_info_workday w
WHERE s.employee_id = w.employee_id
)
Вот запрос, чтобы показать данные объединения для чего-то вроде скрипта БД:
CREATE TABLE employee_office_location (
source VARCHAR(50),
employee_id VARCHAR(50),
office_location INT,
PRIMARY KEY (source, employee_id) -- Ensures each (source, employee_id) pair is unique
);
INSERT INTO employee_office_location (source, employee_id, office_location) VALUES
('ADP', '1', 1),
('Salesforce', '57', 2),
('Workday', '57', 2),
('Bamboo', '58', 3),
('Salesforce', '59', 4),
('Mihi', '1010013461', 5);
Есть идеи?
@NidenK можешь ли ты проголосовать за ответ, раз уж ты его уже принял
with main as (
SELECT 'ADP' AS source, '57' AS employee_id, 1 AS office_location
UNION ALL
SELECT 'Salesforce', '57', 2
UNION ALL
SELECT 'Workday', '57', 2
UNION ALL
SELECT 'Bamboo', '58', 3
UNION ALL
SELECT 'Salesforce', '59', 4
UNION ALL
SELECT 'Mihi', '1010013461', 5
),
test as (
select *, list_agg(source) over(partition by employee_id) as list_of_sources
from main
),
logic as (
select *,
if (list_of_sources like '%Salesforce%' and list_of_sources like '%Workday%', true,false) as is_list_salesforce_and_workday,
if (list_of_sources like '%Salesforce%' and list_of_sources like '%Workday%' and source = 'Salesforce', true,false) as is_list_salesforce_and_workday_and_source_is_salesforce
from test
)
select source, employee_id, office_location from logic where not is_list_salesforce_and_workday OR not is_list_salesforce_and_workday_and_source_is_salesforce
order by employee_id
По сути, это дубликат stackoverflow.com/questions/78871429/…