Professional Documents
Culture Documents
Farhan Al Farisi - Baca - File - Teks
Farhan Al Farisi - Baca - File - Teks
Farhan Al Farisi - Baca - File - Teks
152021045
If-Aa
Source Code
Import Os
From Collections Import Counter
From Docx Import Document
Import Pypdf2
Import Re
Listfile = []
Document_Kata = {}
Namafile = List()
If Os.Path.Isfile(File_Path):
Listfile.Append(File_Name)
If File_Name.Endswith(".Docx"):
Kata_Set = []
Doc = Document(File_Path)
For Paragraph In Doc.Paragraphs:
Words = Paragraph.Text.Split()
For Kata In Words:
Kata = Re.Sub(R'[,\(\)/=:*.\'\“\”]', '', Kata)
Kata = Kata.Lower()
Kata_Set.Append(Kata)
Document_Kata[File_Name] = Kata_Set
Namafile.Append(File_Name)
Elif File_Name.Endswith(".Pdf"):
Kata_Set = []
Pdf_File = Open(File_Path, 'Rb')
Pdf_Reader = Pypdf2.Pdfreader(Pdf_File)
Num_Pages = Len(Pdf_Reader.Pages)
For Halaman In Range(Num_Pages):
Page = Pdf_Reader.Pages[Halaman]
Kata_Pdf = Page.Extract_Text().Split()
For Kata_Bersih In Kata_Pdf:
Kata_Bersih = Kata_Bersih.Lower()
Kata_Pdf = Re.Sub(R'[,\(\)/=:*.\'\“\”]', '', Kata_Bersih)
Kata_Set.Append(Kata_Pdf)
Document_Kata[File_Name] = Kata_Set
Namafile.Append(File_Name)
Elif File_Name.Endswith(".Txt"):
Kata_Set = []
With Open(File_Path, 'R') As File:
Content = File.Read()
Kata_Bersih = Content.Lower()
Kata_Bersih = Kata_Bersih.Split()
For Kata In Kata_Bersih:
Kata_Txt = Re.Sub(R'[,\(\)/=:*.\'\“\”]', '', Kata)
Kata_Set.Append(Kata_Txt)
Document_Kata[File_Name] = Kata_Set
Namafile.Append(File_Name)
For I In Document_Kata:
Ext = Os.Path.Splitext(I)[1]
Print(F"Nama File: {I}")
Print(F"Extensi File:{Ext}")
Kata_Counter = Counter(Document_Kata[I])
For Kata, Kemunculan In Kata_Counter.Items():
Print(F"{Kata} ==> {Kemunculan}")
Print()
Print Screen
Output Program