@article{Šustov_Zaitseva-Pärnaste_2025, author = {Šustov, Kirill and Zaitseva-Pärnaste, Inga}, title = {Optimizing AIS Data Format Based on HELCOM Datasets}, journal = {TransNav, the International Journal on Marine Navigation and Safety of Sea Transportation}, volume = {19}, number = {4}, pages = {1189-1194}, year = {2025}, url = {./Article_Optimizing_AIS_Data_Format_Based_Šustov,76,1606.html}, abstract = {Automatic Identification System (AIS) data plays a vital role in a wide range of maritime research areas, including logistics optimization, navigational safety analysis, economic activity monitoring, and environmental impact assessment. The HELCOM (Helsinki Commission) organization collects and maintains extensive AIS data for the Baltic Sea region, offering researchers valuable insights into vessel movement and marine traffic patterns. However, the raw AIS data (typically provided in CSV plaintext format) is often large and inefficient to store due to a) plain-text redundancy, b) high levels of duplication and repetitive information. For effective storage and transmission, AIS data is usually compressed as it is, using widely used compression tools (e.g. zip archive). In this study, we investigate techniques for optimizing the storage of HELCOM AIS data by manipulations of data format and structure. Our research reveals that after the undertaken steps, the size of the uncompressed dataset decreased by approx. 60%; the compressed dataset size decreased by approx. 90% compared to the original, revealing the potential for substantial storage savings. To further improve data handling, we experimented with various structural optimizations of the CSV format, including data arranging by core attributes, column ordering optimization, dataset normalization involving the segregation of mutable and immutable parts. For example, vessel-specific attributes such as ship name, MMSI (Maritime Mobile Service Identity) code, IMO (International Maritime Organization), origin, and dimensions, which stay the same across records for a vessel, can be moved into a separate file during normalization, which significantly reduces the dataset size. The article compares several AIS data persisting strategies to identify the most memory-efficient approaches. Furthermore, we introduce a data generation tool that produces synthetic AIS datasets in customizable formats and patterns. This tool enables reproducibility of the study and supports further experimentation with AIS data optimization approaches.}, doi = {10.12716/1001.19.04.16}, issn = {2083-6473}, publisher = {Gdynia Maritime University, Faculty of Navigation}, keywords = {} }