digitalization

This commit is contained in:
2026-01-21 19:39:37 +03:00
parent cd5fcd3452
commit 5041e6bd51
8 changed files with 122 additions and 11 deletions

View File

@@ -37,6 +37,7 @@ def parse_nomenclature(xml: str):
df = pd.DataFrame(rows)
df = df[df['parent_key'] == 'e0eb911c-03a0-11ef-95bd-fa163e7429d8']
df['description'] = df['description'].str.extract(r'^([^\s(]+)')
df = df.drop_duplicates(subset='description')
return df
except ET.ParseError:
raise