89 lines
2.9 KiB
Python
89 lines
2.9 KiB
Python
import os
|
|
import requests
|
|
import shutil
|
|
import openai
|
|
from openai import OpenAI
|
|
import re
|
|
|
|
client = OpenAI(api_key=os.getenv("CHATGPT_TOKEN"))
|
|
|
|
|
|
def sanitize_filename(name):
|
|
# Remove invalid characters from the file name
|
|
return re.sub(r'[\\/*?:"<>|]', "", name)
|
|
|
|
|
|
def generate_pdf_name(file_content):
|
|
formatted_content = file_content.replace("\n", " ")
|
|
try:
|
|
response = client.chat.completions.create(
|
|
model="gpt-3.5-turbo",
|
|
messages=[
|
|
{
|
|
"role": "system",
|
|
"content": "Suggest a new name for a document with the following content: "
|
|
+ formatted_content[:500],
|
|
},
|
|
{"role": "user", "content": ""},
|
|
{
|
|
"role": "assistant",
|
|
"content": '"Energetic Greetings: An Expressive Salutation"',
|
|
},
|
|
],
|
|
temperature=1,
|
|
max_tokens=256,
|
|
top_p=1,
|
|
frequency_penalty=0,
|
|
presence_penalty=0,
|
|
)
|
|
print("Response Object:", response)
|
|
if response and response.choices:
|
|
suggested_name = response.choices[0].message.content.strip()
|
|
if "unable to suggest" not in suggested_name:
|
|
# Sanitize the suggested file name
|
|
sanitized_name = sanitize_filename(suggested_name)
|
|
return sanitized_name + ".txt"
|
|
else:
|
|
return "Unable_To_Suggest_Name.txt"
|
|
else:
|
|
return "No_Response.txt"
|
|
except Exception as e:
|
|
print("The server could not be reached")
|
|
print(e.__cause__) # an underlying Exception, likely raised within httpx.
|
|
except openai.RateLimitError as e:
|
|
print("A 429 status code was received; we should back off a bit.")
|
|
except openai.APIStatusError as e:
|
|
print("Another non-200-range status code was received")
|
|
print(e.status_code)
|
|
print(e.response)
|
|
|
|
|
|
def main():
|
|
content_dir = "content/"
|
|
cleaned_content_dir = "cleaned-content/"
|
|
|
|
# Create the cleaned content directory if it doesn't exist
|
|
if not os.path.exists(cleaned_content_dir):
|
|
os.makedirs(cleaned_content_dir)
|
|
|
|
# Process each file in the content directory
|
|
for filename in os.listdir(content_dir):
|
|
if filename.endswith(".txt"):
|
|
file_path = os.path.join(content_dir, filename)
|
|
|
|
# Read the content of the file
|
|
with open(file_path, "r") as file:
|
|
file_content = file.read()
|
|
|
|
# Generate a new name for the document
|
|
new_name = generate_pdf_name(file_content)
|
|
|
|
# Copy the file to the cleaned-content directory
|
|
new_file_path = os.path.join(cleaned_content_dir, new_name)
|
|
shutil.copy(file_path, new_file_path)
|
|
|
|
print(f"Copied and renamed '{filename}' to '{new_name}'")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|