62 lines
2.5 KiB
Python
62 lines
2.5 KiB
Python
import requests
|
||
import xml.etree.ElementTree as ET
|
||
from base64 import b64encode
|
||
from server.backend.schemas.pydantic import settings
|
||
from functools import lru_cache
|
||
import pandas as pd
|
||
from server.backend.api.session import get_session
|
||
|
||
session = get_session({
|
||
"Accept": "application/xml",
|
||
})
|
||
|
||
def fetch_nomenclature():
|
||
response = session.get(settings.URL_NOMENCLATURE, timeout=10)
|
||
response.raise_for_status()
|
||
return response.text
|
||
|
||
def parse_nomenclature(xml: str):
|
||
try:
|
||
NS = {
|
||
"atom": "http://www.w3.org/2005/Atom",
|
||
"m": "http://schemas.microsoft.com/ado/2007/08/dataservices/metadata",
|
||
"d": "http://schemas.microsoft.com/ado/2007/08/dataservices",
|
||
}
|
||
rows = []
|
||
# Парсинг XML-ответа
|
||
root = ET.fromstring(xml)
|
||
# Извлечение конкретных данных
|
||
for entry in root.findall('atom:entry', NS):
|
||
properties = entry.find('atom:content',NS).find(
|
||
'm:properties', NS)
|
||
rows.append({
|
||
'ref_key': properties.findtext('d:Ref_Key', default=None, namespaces=NS),
|
||
'description': properties.findtext('d:Description', default=None, namespaces=NS),
|
||
'parent_key': properties.findtext('d:Parent_Key', default=None, namespaces=NS)
|
||
})
|
||
df = pd.DataFrame(rows)
|
||
df = df[df['parent_key'] == 'e0eb911c-03a0-11ef-95bd-fa163e7429d8']
|
||
df['description'] = df['description'].str.extract(r'^([^\s(]+)')
|
||
df = df.drop_duplicates(subset='description')
|
||
return df
|
||
except ET.ParseError:
|
||
raise
|
||
@lru_cache(maxsize=1)
|
||
def nomenclature(flag=False):
|
||
xml_data = fetch_nomenclature()
|
||
root = parse_nomenclature(xml_data)
|
||
if flag:
|
||
root.to_excel("./excel_files/nomenclature.xlsx")
|
||
return root
|
||
def processing(df):
|
||
df2=nomenclature()
|
||
result = df.merge(
|
||
df2[['description', 'ref_key']], #берутся столбцы из df2
|
||
left_on='arti', #столбец для сравнения в df
|
||
right_on='description', #столбец для сравнения в df2
|
||
how='left' #left join для df
|
||
).drop(columns='description') #удаление временного стобца
|
||
not_matched = result.loc[result['ref_key'].isna(), 'arti'].unique()
|
||
if len(not_matched) > 0:
|
||
raise ValueError(f'Не найдены значения: {not_matched}')
|
||
return result |