import requests import xml.etree.ElementTree as ET from base64 import b64encode from server.backend.schemas.pydantic import settings from functools import lru_cache import pandas as pd from server.backend.api.session import get_session session = get_session({ "Accept": "application/xml", }) def fetch_nomenclature(): response = session.get(settings.URL_NOMENCLATURE, timeout=10) response.raise_for_status() return response.text def parse_nomenclature(xml: str): try: NS = { "atom": "http://www.w3.org/2005/Atom", "m": "http://schemas.microsoft.com/ado/2007/08/dataservices/metadata", "d": "http://schemas.microsoft.com/ado/2007/08/dataservices", } rows = [] # Парсинг XML-ответа root = ET.fromstring(xml) # Извлечение конкретных данных for entry in root.findall('atom:entry', NS): properties = entry.find('atom:content',NS).find( 'm:properties', NS) rows.append({ 'ref_key': properties.findtext('d:Ref_Key', default=None, namespaces=NS), 'description': properties.findtext('d:Description', default=None, namespaces=NS), 'parent_key': properties.findtext('d:Parent_Key', default=None, namespaces=NS) }) df = pd.DataFrame(rows) df = df[df['parent_key'] == 'e0eb911c-03a0-11ef-95bd-fa163e7429d8'] df['description'] = df['description'].str.extract(r'^([^\s(]+)') return df except ET.ParseError: raise @lru_cache(maxsize=1) def nomenclature(flag=False): xml_data = fetch_nomenclature() root = parse_nomenclature(xml_data) if flag: root.to_excel("./excel_files/nomenclature.xlsx") return root def processing(df): df2=nomenclature() result = df.merge( df2[['description', 'ref_key']], #берутся столбцы из df2 left_on='arti', #столбец для сравнения в df right_on='description', #столбец для сравнения в df2 how='left' #left join для df ).drop(columns='description') #удаление временного стобца not_matched = result.loc[result['ref_key'].isna(), 'arti'].unique() if len(not_matched) > 0: raise ValueError(f'Не найдены значения: {not_matched}') return result