GMashtalyar

Загрузка данных с почты | Fetching email data

Скачиваем xlsx в Documents папку проекта

def fetch_email_data():
    username = "username"
    password = "password"
    imap_server = "email_server"
    imap = imaplib.IMAP4_SSL(imap_server)
    imap.login(username, password)
    status, messages = imap.select("INBOX")
    messages_number = 1
    messages = int(messages[0])
    for i in range(messages, messages - messages_number, -1):
        res, msg = imap.fetch(str(i), "(RFC822)")  # fetch the email message by ID
        for response in msg:
            if isinstance(response, tuple):
                msg = email.message_from_bytes(response[1])  # parse a bytes email into a message object
                subject, encoding = decode_header(msg["Subject"])[0]  # decode the email subject
                if isinstance(subject, bytes):
                    subject = subject.decode(encoding)  # if it's a bytes, decode to str
                From, encoding = decode_header(msg.get("From"))[0]  # decode email sender
                if isinstance(From, bytes):
                    From = From.decode(encoding)
                print("Subject:", subject)
                print("From:", From)
                if msg.is_multipart():  # if the email message is multipart
                    for part in msg.walk():  # iterate over email parts
                        content_type = part.get_content_type()  # extract content type of email
                        content_disposition = str(part.get("Content-Disposition"))
                        try:
                            body = part.get_payload(decode=True).decode()  # get the email body
                        except:
                            pass
                        if "attachment" in content_disposition:
                            filepath = os.path.join(os.getcwd(), "Documents", "email_data.xlsx")
                            print(f"email{i}")
                            open(filepath, "wb").write(part.get_payload(decode=True))
                print("=" * 100)
    imap.close()
    imap.logout()

Обработка данных excel | Formatting excel file

Подготовка загрузки данных в модель с помощью openpyxl & Pandas .

def reformat_excel(file_path: str, number_of_rows: int):
    """deleting first rows"""
    wb = load_workbook(file_path)
    sheet = wb.active
    for _ in range(number_of_rows):
        sheet.delete_rows(1)
    wb.save('Documents/email.xlsx')

def manipulate_upload():
    raw_data = read_excel('Documents/email.xlsx', dtype={'Контрагент.ИНН': str})
    raw_data.drop(columns=['Клиент', 'Unnamed: 1', 'Unnamed: 2', 'Unnamed: 3', 'Unnamed: 6'], inplace=True)
    raw_data.rename(columns={'Контрагент.ИНН': 'client_inn', 'Долг клиента': 'client_debt',
                         'Долг клиента (просрочено)': 'client_late_debt'}, inplace=True)
    print(raw_data.head(15))
    print(raw_data.dtypes)
    print(f"Data type of column 'client_inn': {raw_data['client_inn'].dtype}")

Обновление модели | Bulk update on model

Обновление модели

@transaction.atomic
def match_model_excel():
    raw_data = read_excel('Documents/email.xlsx', dtype={'Контрагент.ИНН': str})
    raw_data.drop(columns=['Клиент', 'Unnamed: 1', 'Unnamed: 2', 'Unnamed: 3', 'Unnamed: 6'], inplace=True)
    raw_data.rename(columns={'Контрагент.ИНН': 'client_inn', 'Долг клиента': 'client_debt',
                         'Долг клиента (просрочено)': 'client_late_debt'}, inplace=True)
    clients = ClientsModel.objects.all()

    updates = []
    for client in clients:
        matching_row = raw_data[raw_data['client_inn'] == str(client.client_inn)]
        if not matching_row.empty:
            client.client_debt = matching_row['client_debt'].iloc[0] if pd.notna(
                matching_row['client_debt'].iloc[0]) else 0
            client.client_late_debt = matching_row['client_late_debt'].iloc[0] if pd.notna(
                matching_row['client_late_debt'].iloc[0]) else 0
        else:
            client.client_debt = 0
            client.client_late_debt = 0
        updates.append(client)
    ClientsModel.objects.bulk_update(updates, ['client_debt', 'client_late_debt'])