split [选项] [输入文件] [输出文件前缀]
ls logfile.*.log
split -b 10M logfile.log new_logfile_prefix_
awk '{print $0}' logfile.log | sort > sorted_logfile.log
tail -n +2 sorted_logfile.log > new_logfile.log
import os import sys def split_large_file(file_path, chunk_size=10 * 1024 * 1024): file_num = 1 if os.path.isfile(file_path) else len(os.listdir(file_path)) + 1 output_path = os.path.join(os.path.dirname(file_path), f"{os.path.basename(file_path)}_part{file_num}.txt") max_bytes = chunk_size * 1024 * 1024 i.e., 10 MB per chunk size in bytes (change to use MB instead of KB) with open(file_path, "r", encoding="utf-8") as input_file, open(output_path, "w", encoding="utf-8") as output_file: for line in input_file: output_file.write(line) if max_bytes == 0 or output_file.tell() % max_bytes == 0: output_file.close() file_num += 1 output_path = os.path.join(os.path.dirname(file_path), f"{os.path.basename(file_path)}_part{file_num}.txt") with open(output_path, "w", encoding="utf-8") as output_file: reopen the file to get a new file pointer at the start of the file (otherwise you would write to the same location over and over again) A better way would be to write the number of lines written so far into the first line of the next chunk of text but that would require more complex code and may not be necessary depending on how you are processing the data later on This is just a simple example and there may be cases where it is not appropriate to close and reopen the file like this For example if you are using a library that requires the file to remain open for some reason In those cases it may be better to use a context manager like a with statement which automatically closes the file when the block of code exits output_file.close() output_file = open(output_path, "w", encoding="utf-8") The next chunk of text will start here This is just an example so you can adjust the chunk size as needed You could also add error checking to ensure that the file was successfully opened for writing before trying to write to it otherwise you might end up with an empty file if there was an error opening the file for some reason max_bytes = chunk_size * 1024 * 1024 input_file.seek(max_bytes) This is just one possible approach to splitting a large file into smaller chunks it is not necessarily the best approach for all situations and there are many other ways to do it depending on your specific needs and requirements Some other considerations when splitting a large file into smaller chunks include things like how you want to handle errors If you want to continue writing to the original file even if a part of it cannot be written because of an error then you may need to modify the code To avoid creating duplicate files it is important to make sure that each chunk starts at a unique position in the file This can be achieved by adding a unique identifier such as a timestamp or a counter to the start of each chunk Another consideration is how you want to handle overlapping chunks if two chunks overlap then it is possible that some of the data from the first chunk will be included in the second chunk This can be handled differently depending on your specific needs and requirements For example you could choose to overwrite any data in the overlapping chunk rather than appending it to the end of the existing data Or you could choose to merge the data from both chunks into a single chunk rather than keeping them separate There are many different approaches to handling overlapping chunks and the best approach will depend on your specific needs and requirements It is also worth noting that there are many tools available that can help automate the process of splitting a large file into smaller chunks These include libraries such as Apache Commons IO which provides a variety of useful utility functions for working with files including functions for splitting files into smaller chunks There are also command line tools such as GNU split which can be used to split files into smaller chunks without needing to write any additional code In general though it is often easier to use a scripting language such as Python or Bash to automate the process of splitting a large file into smaller chunks This can save time and effort compared to manually writing a script and running it every time you need to split a large file into smaller chunks
声明:本网站发布的内容(图片、视频和文字)以用户投稿、用户转载内容为主,如果涉及侵权请尽快告知,我们将会在第一时间删除。文章观点不代表本网站立场,如需处理请联系客服。电话:028-86922220;邮箱:631063699@qq.com。内容未经允许不得转载,或转载时需注明来源: 成都快上网